1use crate::{
2 arch::asm,
3 core_arch::{simd::*, x86::*},
4 intrinsics::simd::*,
5 mem, ptr,
6};
7
8// x86-32 wants to use a 32-bit address size, but asm! defaults to using the full
9// register name (e.g. rax). We have to explicitly override the placeholder to
10// use the 32-bit register name in that case.
11
12#[cfg(target_pointer_width = "32")]
13macro_rules! vpl {
14 ($inst:expr) => {
15 concat!($inst, ", [{p:e}]")
16 };
17}
18#[cfg(target_pointer_width = "64")]
19macro_rules! vpl {
20 ($inst:expr) => {
21 concat!($inst, ", [{p}]")
22 };
23}
24#[cfg(target_pointer_width = "32")]
25macro_rules! vps {
26 ($inst1:expr, $inst2:expr) => {
27 concat!($inst1, " [{p:e}]", $inst2)
28 };
29}
30#[cfg(target_pointer_width = "64")]
31macro_rules! vps {
32 ($inst1:expr, $inst2:expr) => {
33 concat!($inst1, " [{p}]", $inst2)
34 };
35}
36
37pub(crate) use {vpl, vps};
38
39#[cfg(test)]
40use stdarch_test::assert_instr;
41
42/// Computes the absolute values of packed 32-bit integers in `a`.
43///
44/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
45#[inline]
46#[target_feature(enable = "avx512f")]
47#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
48#[cfg_attr(test, assert_instr(vpabsd))]
49pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
50 let a: i32x16 = a.as_i32x16();
51 // all-0 is a properly initialized i32x16
52 let zero: i32x16 = mem::zeroed();
53 let sub: i32x16 = simd_sub(lhs:zero, rhs:a);
54 let cmp: i32x16 = simd_gt(x:a, y:zero);
55 transmute(src:simd_select(mask:cmp, if_true:a, if_false:sub))
56}
57
58/// Computes the absolute value of packed 32-bit integers in `a`, and store the
59/// unsigned results in `dst` using writemask `k` (elements are copied from
60/// `src` when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
63#[inline]
64#[target_feature(enable = "avx512f")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
68 let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
69 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
70}
71
72/// Computes the absolute value of packed 32-bit integers in `a`, and store the
73/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
74/// the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
77#[inline]
78#[target_feature(enable = "avx512f")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
82 let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
83 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
84 transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
85}
86
87/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
90#[inline]
91#[target_feature(enable = "avx512f,avx512vl")]
92#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
93#[cfg_attr(test, assert_instr(vpabsd))]
94pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
95 let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
96 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
97}
98
99/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
102#[inline]
103#[target_feature(enable = "avx512f,avx512vl")]
104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105#[cfg_attr(test, assert_instr(vpabsd))]
106pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
107 let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
108 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
109 transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
110}
111
112/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
113///
114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
115#[inline]
116#[target_feature(enable = "avx512f,avx512vl")]
117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
118#[cfg_attr(test, assert_instr(vpabsd))]
119pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
120 let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
121 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
122}
123
124/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
127#[inline]
128#[target_feature(enable = "avx512f,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpabsd))]
131pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
132 let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
133 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
134 transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
135}
136
137/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
138///
139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
140#[inline]
141#[target_feature(enable = "avx512f")]
142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
143#[cfg_attr(test, assert_instr(vpabsq))]
144pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
145 let a: i64x8 = a.as_i64x8();
146 // all-0 is a properly initialized i64x8
147 let zero: i64x8 = mem::zeroed();
148 let sub: i64x8 = simd_sub(lhs:zero, rhs:a);
149 let cmp: i64x8 = simd_gt(x:a, y:zero);
150 transmute(src:simd_select(mask:cmp, if_true:a, if_false:sub))
151}
152
153/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
154///
155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
156#[inline]
157#[target_feature(enable = "avx512f")]
158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
159#[cfg_attr(test, assert_instr(vpabsq))]
160pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
161 let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
162 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
163}
164
165/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
166///
167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
168#[inline]
169#[target_feature(enable = "avx512f")]
170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
171#[cfg_attr(test, assert_instr(vpabsq))]
172pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
173 let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
174 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
175 transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
176}
177
178/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
179///
180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
181#[inline]
182#[target_feature(enable = "avx512f,avx512vl")]
183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
184#[cfg_attr(test, assert_instr(vpabsq))]
185pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
186 let a: i64x4 = a.as_i64x4();
187 // all-0 is a properly initialized i64x4
188 let zero: i64x4 = mem::zeroed();
189 let sub: i64x4 = simd_sub(lhs:zero, rhs:a);
190 let cmp: i64x4 = simd_gt(x:a, y:zero);
191 transmute(src:simd_select(mask:cmp, if_true:a, if_false:sub))
192}
193
194/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
195///
196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
197#[inline]
198#[target_feature(enable = "avx512f,avx512vl")]
199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
200#[cfg_attr(test, assert_instr(vpabsq))]
201pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
202 let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
203 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
204}
205
206/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
207///
208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
209#[inline]
210#[target_feature(enable = "avx512f,avx512vl")]
211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
212#[cfg_attr(test, assert_instr(vpabsq))]
213pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
214 let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
215 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
216 transmute(src:simd_select_bitmask(m:k, yes:abs, no:zero))
217}
218
219/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
220///
221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
222#[inline]
223#[target_feature(enable = "avx512f")]
224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
225#[cfg_attr(test, assert_instr(vpandq))]
226pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
227 let a: __m512i = _mm512_set1_epi32(0x7FFFFFFF); // from LLVM code
228 let b: __m512i = transmute::<f32x16, __m512i>(src:v2.as_f32x16());
229 let abs: __m512i = _mm512_and_epi32(a, b);
230 transmute(src:abs)
231}
232
233/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
236#[inline]
237#[target_feature(enable = "avx512f")]
238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
239#[cfg_attr(test, assert_instr(vpandd))]
240pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
241 let abs: f32x16 = _mm512_abs_ps(v2).as_f32x16();
242 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_f32x16()))
243}
244
245/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr(vpandq))]
252pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
253 let a: __m512i = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF); // from LLVM code
254 let b: __m512i = transmute::<f64x8, __m512i>(src:v2.as_f64x8());
255 let abs: __m512i = _mm512_and_epi64(a, b);
256 transmute(src:abs)
257}
258
259/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
260///
261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
262#[inline]
263#[target_feature(enable = "avx512f")]
264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
265#[cfg_attr(test, assert_instr(vpandq))]
266pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
267 let abs: f64x8 = _mm512_abs_pd(v2).as_f64x8();
268 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_f64x8()))
269}
270
271/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
272///
273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
274#[inline]
275#[target_feature(enable = "avx512f")]
276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
277#[cfg_attr(test, assert_instr(vmovdqa32))]
278pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
279 let mov: i32x16 = a.as_i32x16();
280 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
281}
282
283/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
284///
285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
286#[inline]
287#[target_feature(enable = "avx512f")]
288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
289#[cfg_attr(test, assert_instr(vmovdqa32))]
290pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
291 let mov: i32x16 = a.as_i32x16();
292 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
293 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
294}
295
296/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
299#[inline]
300#[target_feature(enable = "avx512f,avx512vl")]
301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
302#[cfg_attr(test, assert_instr(vmovdqa32))]
303pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
304 let mov: i32x8 = a.as_i32x8();
305 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
306}
307
308/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
311#[inline]
312#[target_feature(enable = "avx512f,avx512vl")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vmovdqa32))]
315pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
316 let mov: i32x8 = a.as_i32x8();
317 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
318 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
319}
320
321/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
322///
323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
324#[inline]
325#[target_feature(enable = "avx512f,avx512vl")]
326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
327#[cfg_attr(test, assert_instr(vmovdqa32))]
328pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
329 let mov: i32x4 = a.as_i32x4();
330 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
331}
332
333/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
334///
335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
336#[inline]
337#[target_feature(enable = "avx512f,avx512vl")]
338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
339#[cfg_attr(test, assert_instr(vmovdqa32))]
340pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
341 let mov: i32x4 = a.as_i32x4();
342 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
343 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
344}
345
346/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
347///
348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
349#[inline]
350#[target_feature(enable = "avx512f")]
351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
352#[cfg_attr(test, assert_instr(vmovdqa64))]
353pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
354 let mov: i64x8 = a.as_i64x8();
355 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
356}
357
358/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
359///
360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
361#[inline]
362#[target_feature(enable = "avx512f")]
363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
364#[cfg_attr(test, assert_instr(vmovdqa64))]
365pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
366 let mov: i64x8 = a.as_i64x8();
367 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
368 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
369}
370
371/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
372///
373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
374#[inline]
375#[target_feature(enable = "avx512f,avx512vl")]
376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377#[cfg_attr(test, assert_instr(vmovdqa64))]
378pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
379 let mov: i64x4 = a.as_i64x4();
380 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
381}
382
383/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
384///
385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
386#[inline]
387#[target_feature(enable = "avx512f,avx512vl")]
388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
389#[cfg_attr(test, assert_instr(vmovdqa64))]
390pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
391 let mov: i64x4 = a.as_i64x4();
392 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
393 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
394}
395
396/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
399#[inline]
400#[target_feature(enable = "avx512f,avx512vl")]
401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402#[cfg_attr(test, assert_instr(vmovdqa64))]
403pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
404 let mov: i64x2 = a.as_i64x2();
405 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
406}
407
408/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
409///
410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
411#[inline]
412#[target_feature(enable = "avx512f,avx512vl")]
413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
414#[cfg_attr(test, assert_instr(vmovdqa64))]
415pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
416 let mov: i64x2 = a.as_i64x2();
417 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
418 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
419}
420
421/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
424#[inline]
425#[target_feature(enable = "avx512f")]
426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427#[cfg_attr(test, assert_instr(vmovaps))]
428pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
429 let mov: f32x16 = a.as_f32x16();
430 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
431}
432
433/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
434///
435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
436#[inline]
437#[target_feature(enable = "avx512f")]
438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
439#[cfg_attr(test, assert_instr(vmovaps))]
440pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
441 let mov: f32x16 = a.as_f32x16();
442 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
443 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
444}
445
446/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
447///
448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
449#[inline]
450#[target_feature(enable = "avx512f,avx512vl")]
451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
452#[cfg_attr(test, assert_instr(vmovaps))]
453pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
454 let mov: f32x8 = a.as_f32x8();
455 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
456}
457
458/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
459///
460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
461#[inline]
462#[target_feature(enable = "avx512f,avx512vl")]
463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
464#[cfg_attr(test, assert_instr(vmovaps))]
465pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
466 let mov: f32x8 = a.as_f32x8();
467 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
468 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
474#[inline]
475#[target_feature(enable = "avx512f,avx512vl")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
479 let mov: f32x4 = a.as_f32x4();
480 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
481}
482
483/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
486#[inline]
487#[target_feature(enable = "avx512f,avx512vl")]
488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
489#[cfg_attr(test, assert_instr(vmovaps))]
490pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
491 let mov: f32x4 = a.as_f32x4();
492 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
493 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
494}
495
496/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
497///
498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
499#[inline]
500#[target_feature(enable = "avx512f")]
501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
502#[cfg_attr(test, assert_instr(vmovapd))]
503pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
504 let mov: f64x8 = a.as_f64x8();
505 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
506}
507
508/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
509///
510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
511#[inline]
512#[target_feature(enable = "avx512f")]
513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
514#[cfg_attr(test, assert_instr(vmovapd))]
515pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
516 let mov: f64x8 = a.as_f64x8();
517 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
518 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
519}
520
521/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
522///
523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
524#[inline]
525#[target_feature(enable = "avx512f,avx512vl")]
526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
527#[cfg_attr(test, assert_instr(vmovapd))]
528pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
529 let mov: f64x4 = a.as_f64x4();
530 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
531}
532
533/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
534///
535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
536#[inline]
537#[target_feature(enable = "avx512f,avx512vl")]
538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
539#[cfg_attr(test, assert_instr(vmovapd))]
540pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
541 let mov: f64x4 = a.as_f64x4();
542 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
543 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
544}
545
546/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
549#[inline]
550#[target_feature(enable = "avx512f,avx512vl")]
551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
552#[cfg_attr(test, assert_instr(vmovapd))]
553pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
554 let mov: f64x2 = a.as_f64x2();
555 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
556}
557
558/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
559///
560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
561#[inline]
562#[target_feature(enable = "avx512f,avx512vl")]
563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
564#[cfg_attr(test, assert_instr(vmovapd))]
565pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
566 let mov: f64x2 = a.as_f64x2();
567 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
568 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
569}
570
571/// Add packed 32-bit integers in a and b, and store the results in dst.
572///
573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
574#[inline]
575#[target_feature(enable = "avx512f")]
576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
577#[cfg_attr(test, assert_instr(vpaddd))]
578pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
579 transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16()))
580}
581
582/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
583///
584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
585#[inline]
586#[target_feature(enable = "avx512f")]
587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
588#[cfg_attr(test, assert_instr(vpaddd))]
589pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
590 let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
591 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
592}
593
594/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595///
596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
597#[inline]
598#[target_feature(enable = "avx512f")]
599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
600#[cfg_attr(test, assert_instr(vpaddd))]
601pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
602 let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
603 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
604 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
605}
606
607/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
608///
609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
610#[inline]
611#[target_feature(enable = "avx512f,avx512vl")]
612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
613#[cfg_attr(test, assert_instr(vpaddd))]
614pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
615 let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
616 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
617}
618
619/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
620///
621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
622#[inline]
623#[target_feature(enable = "avx512f,avx512vl")]
624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
625#[cfg_attr(test, assert_instr(vpaddd))]
626pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
627 let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
628 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
629 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
630}
631
632/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
633///
634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
635#[inline]
636#[target_feature(enable = "avx512f,avx512vl")]
637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
638#[cfg_attr(test, assert_instr(vpaddd))]
639pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
640 let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
641 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
642}
643
644/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
645///
646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
647#[inline]
648#[target_feature(enable = "avx512f,avx512vl")]
649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
650#[cfg_attr(test, assert_instr(vpaddd))]
651pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
652 let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
653 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
654 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
655}
656
657/// Add packed 64-bit integers in a and b, and store the results in dst.
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
660#[inline]
661#[target_feature(enable = "avx512f")]
662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
663#[cfg_attr(test, assert_instr(vpaddq))]
664pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
665 transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8()))
666}
667
668/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
669///
670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
671#[inline]
672#[target_feature(enable = "avx512f")]
673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
674#[cfg_attr(test, assert_instr(vpaddq))]
675pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
676 let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
677 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
678}
679
680/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
681///
682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
683#[inline]
684#[target_feature(enable = "avx512f")]
685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
686#[cfg_attr(test, assert_instr(vpaddq))]
687pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
688 let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
689 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
690 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
691}
692
693/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
694///
695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
696#[inline]
697#[target_feature(enable = "avx512f,avx512vl")]
698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
699#[cfg_attr(test, assert_instr(vpaddq))]
700pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
701 let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
702 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
703}
704
705/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
706///
707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
708#[inline]
709#[target_feature(enable = "avx512f,avx512vl")]
710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
711#[cfg_attr(test, assert_instr(vpaddq))]
712pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
713 let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
714 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
715 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
716}
717
718/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
719///
720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
721#[inline]
722#[target_feature(enable = "avx512f,avx512vl")]
723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
724#[cfg_attr(test, assert_instr(vpaddq))]
725pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726 let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
727 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
728}
729
730/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
731///
732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
733#[inline]
734#[target_feature(enable = "avx512f,avx512vl")]
735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
736#[cfg_attr(test, assert_instr(vpaddq))]
737pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
738 let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
739 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
740 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
741}
742
743/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
744///
745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
746#[inline]
747#[target_feature(enable = "avx512f")]
748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
749#[cfg_attr(test, assert_instr(vaddps))]
750pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
751 transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16()))
752}
753
754/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
755///
756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
757#[inline]
758#[target_feature(enable = "avx512f")]
759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
760#[cfg_attr(test, assert_instr(vaddps))]
761pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
762 let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
763 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
764}
765
766/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
767///
768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
769#[inline]
770#[target_feature(enable = "avx512f")]
771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
772#[cfg_attr(test, assert_instr(vaddps))]
773pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
774 let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
775 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
776 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
777}
778
779/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
780///
781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
782#[inline]
783#[target_feature(enable = "avx512f,avx512vl")]
784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
785#[cfg_attr(test, assert_instr(vaddps))]
786pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
787 let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
788 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
789}
790
791/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
792///
793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
794#[inline]
795#[target_feature(enable = "avx512f,avx512vl")]
796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
797#[cfg_attr(test, assert_instr(vaddps))]
798pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
799 let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
800 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
801 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
802}
803
804/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
807#[inline]
808#[target_feature(enable = "avx512f,avx512vl")]
809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
810#[cfg_attr(test, assert_instr(vaddps))]
811pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
812 let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
813 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
814}
815
816/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
817///
818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
819#[inline]
820#[target_feature(enable = "avx512f,avx512vl")]
821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
822#[cfg_attr(test, assert_instr(vaddps))]
823pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
824 let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
825 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
826 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
827}
828
829/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
830///
831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
832#[inline]
833#[target_feature(enable = "avx512f")]
834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
835#[cfg_attr(test, assert_instr(vaddpd))]
836pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
837 transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8()))
838}
839
840/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846#[cfg_attr(test, assert_instr(vaddpd))]
847pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
848 let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
849 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
850}
851
852/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
853///
854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
855#[inline]
856#[target_feature(enable = "avx512f")]
857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
858#[cfg_attr(test, assert_instr(vaddpd))]
859pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
860 let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
861 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
862 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
863}
864
865/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
866///
867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
868#[inline]
869#[target_feature(enable = "avx512f,avx512vl")]
870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871#[cfg_attr(test, assert_instr(vaddpd))]
872pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
873 let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
874 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
875}
876
877/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
878///
879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
880#[inline]
881#[target_feature(enable = "avx512f,avx512vl")]
882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
883#[cfg_attr(test, assert_instr(vaddpd))]
884pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
885 let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
886 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
887 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
888}
889
890/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
891///
892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
893#[inline]
894#[target_feature(enable = "avx512f,avx512vl")]
895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
896#[cfg_attr(test, assert_instr(vaddpd))]
897pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
898 let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
899 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
900}
901
902/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
903///
904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
905#[inline]
906#[target_feature(enable = "avx512f,avx512vl")]
907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
908#[cfg_attr(test, assert_instr(vaddpd))]
909pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
910 let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
911 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
912 transmute(src:simd_select_bitmask(m:k, yes:add, no:zero))
913}
914
915/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
916///
917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
918#[inline]
919#[target_feature(enable = "avx512f")]
920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
921#[cfg_attr(test, assert_instr(vpsubd))]
922pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
923 transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16()))
924}
925
926/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
927///
928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
929#[inline]
930#[target_feature(enable = "avx512f")]
931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
932#[cfg_attr(test, assert_instr(vpsubd))]
933pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
934 let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
935 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
936}
937
938/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
939///
940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
941#[inline]
942#[target_feature(enable = "avx512f")]
943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
944#[cfg_attr(test, assert_instr(vpsubd))]
945pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
946 let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
947 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
948 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
949}
950
951/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
952///
953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
954#[inline]
955#[target_feature(enable = "avx512f,avx512vl")]
956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
957#[cfg_attr(test, assert_instr(vpsubd))]
958pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
959 let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
960 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
961}
962
963/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vpsubd))]
970pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
971 let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
972 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
973 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
974}
975
976/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
977///
978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
979#[inline]
980#[target_feature(enable = "avx512f,avx512vl")]
981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
982#[cfg_attr(test, assert_instr(vpsubd))]
983pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
984 let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
985 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
986}
987
988/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
989///
990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
991#[inline]
992#[target_feature(enable = "avx512f,avx512vl")]
993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
994#[cfg_attr(test, assert_instr(vpsubd))]
995pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
996 let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
997 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
998 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
999}
1000
1001/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1002///
1003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1004#[inline]
1005#[target_feature(enable = "avx512f")]
1006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1007#[cfg_attr(test, assert_instr(vpsubq))]
1008pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1009 transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8()))
1010}
1011
1012/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1013///
1014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1015#[inline]
1016#[target_feature(enable = "avx512f")]
1017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1018#[cfg_attr(test, assert_instr(vpsubq))]
1019pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1020 let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1021 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1022}
1023
1024/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1025///
1026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1027#[inline]
1028#[target_feature(enable = "avx512f")]
1029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1030#[cfg_attr(test, assert_instr(vpsubq))]
1031pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1032 let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1033 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1034 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1035}
1036
1037/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1038///
1039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1040#[inline]
1041#[target_feature(enable = "avx512f,avx512vl")]
1042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1043#[cfg_attr(test, assert_instr(vpsubq))]
1044pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1045 let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1046 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1047}
1048
1049/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1052#[inline]
1053#[target_feature(enable = "avx512f,avx512vl")]
1054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1055#[cfg_attr(test, assert_instr(vpsubq))]
1056pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1057 let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1058 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1059 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1060}
1061
1062/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1065#[inline]
1066#[target_feature(enable = "avx512f,avx512vl")]
1067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1068#[cfg_attr(test, assert_instr(vpsubq))]
1069pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1070 let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1071 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1072}
1073
1074/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1075///
1076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1077#[inline]
1078#[target_feature(enable = "avx512f,avx512vl")]
1079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1080#[cfg_attr(test, assert_instr(vpsubq))]
1081pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1082 let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1083 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1084 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1085}
1086
1087/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1088///
1089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1090#[inline]
1091#[target_feature(enable = "avx512f")]
1092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1093#[cfg_attr(test, assert_instr(vsubps))]
1094pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1095 transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16()))
1096}
1097
1098/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1099///
1100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1101#[inline]
1102#[target_feature(enable = "avx512f")]
1103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1104#[cfg_attr(test, assert_instr(vsubps))]
1105pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1106 let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1107 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1108}
1109
1110/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1113#[inline]
1114#[target_feature(enable = "avx512f")]
1115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1116#[cfg_attr(test, assert_instr(vsubps))]
1117pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1118 let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1119 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
1120 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1121}
1122
1123/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1124///
1125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1126#[inline]
1127#[target_feature(enable = "avx512f,avx512vl")]
1128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1129#[cfg_attr(test, assert_instr(vsubps))]
1130pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1131 let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1132 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1133}
1134
1135/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1136///
1137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1138#[inline]
1139#[target_feature(enable = "avx512f,avx512vl")]
1140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1141#[cfg_attr(test, assert_instr(vsubps))]
1142pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1143 let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1144 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
1145 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1146}
1147
1148/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1149///
1150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1151#[inline]
1152#[target_feature(enable = "avx512f,avx512vl")]
1153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1154#[cfg_attr(test, assert_instr(vsubps))]
1155pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1156 let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1157 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1158}
1159
1160/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1161///
1162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1163#[inline]
1164#[target_feature(enable = "avx512f,avx512vl")]
1165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1166#[cfg_attr(test, assert_instr(vsubps))]
1167pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1168 let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1169 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
1170 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1171}
1172
1173/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1174///
1175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1176#[inline]
1177#[target_feature(enable = "avx512f")]
1178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1179#[cfg_attr(test, assert_instr(vsubpd))]
1180pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1181 transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8()))
1182}
1183
1184/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1185///
1186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1187#[inline]
1188#[target_feature(enable = "avx512f")]
1189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1190#[cfg_attr(test, assert_instr(vsubpd))]
1191pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1192 let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1193 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1194}
1195
1196/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1197///
1198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1199#[inline]
1200#[target_feature(enable = "avx512f")]
1201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1202#[cfg_attr(test, assert_instr(vsubpd))]
1203pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1204 let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1205 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
1206 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1207}
1208
1209/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1210///
1211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1212#[inline]
1213#[target_feature(enable = "avx512f,avx512vl")]
1214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1215#[cfg_attr(test, assert_instr(vsubpd))]
1216pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1217 let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1218 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1219}
1220
1221/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1222///
1223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1224#[inline]
1225#[target_feature(enable = "avx512f,avx512vl")]
1226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1227#[cfg_attr(test, assert_instr(vsubpd))]
1228pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1229 let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1230 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
1231 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1232}
1233
1234/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vsubpd))]
1241pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1242 let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1243 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1244}
1245
1246/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1247///
1248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1249#[inline]
1250#[target_feature(enable = "avx512f,avx512vl")]
1251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1252#[cfg_attr(test, assert_instr(vsubpd))]
1253pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1254 let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1255 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
1256 transmute(src:simd_select_bitmask(m:k, yes:sub, no:zero))
1257}
1258
1259/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1260///
1261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1262#[inline]
1263#[target_feature(enable = "avx512f")]
1264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1265#[cfg_attr(test, assert_instr(vpmuldq))]
1266pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1267 transmute(src:vpmuldq(a:a.as_i32x16(), b:b.as_i32x16()))
1268}
1269
1270/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1271///
1272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1273#[inline]
1274#[target_feature(enable = "avx512f")]
1275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1276#[cfg_attr(test, assert_instr(vpmuldq))]
1277pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1278 let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1279 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1280}
1281
1282/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1283///
1284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1285#[inline]
1286#[target_feature(enable = "avx512f")]
1287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1288#[cfg_attr(test, assert_instr(vpmuldq))]
1289pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1290 let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1291 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
1292 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1293}
1294
1295/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1296///
1297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1298#[inline]
1299#[target_feature(enable = "avx512f,avx512vl")]
1300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1301#[cfg_attr(test, assert_instr(vpmuldq))]
1302pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1303 let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1304 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1305}
1306
1307/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1308///
1309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1310#[inline]
1311#[target_feature(enable = "avx512f,avx512vl")]
1312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1313#[cfg_attr(test, assert_instr(vpmuldq))]
1314pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1315 let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1316 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1317 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1318}
1319
1320/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1321///
1322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1323#[inline]
1324#[target_feature(enable = "avx512f,avx512vl")]
1325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1326#[cfg_attr(test, assert_instr(vpmuldq))]
1327pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1328 let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1329 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1330}
1331
1332/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1333///
1334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1335#[inline]
1336#[target_feature(enable = "avx512f,avx512vl")]
1337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1338#[cfg_attr(test, assert_instr(vpmuldq))]
1339pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1340 let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1341 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1342 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1343}
1344
1345/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1346///
1347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi&expand=4005)
1348#[inline]
1349#[target_feature(enable = "avx512f")]
1350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1351#[cfg_attr(test, assert_instr(vpmulld))]
1352pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1353 transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16()))
1354}
1355
1356/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1359#[inline]
1360#[target_feature(enable = "avx512f")]
1361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1362#[cfg_attr(test, assert_instr(vpmulld))]
1363pub unsafe fn _mm512_mask_mullo_epi32(
1364 src: __m512i,
1365 k: __mmask16,
1366 a: __m512i,
1367 b: __m512i,
1368) -> __m512i {
1369 let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1370 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1371}
1372
1373/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1374///
1375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1376#[inline]
1377#[target_feature(enable = "avx512f")]
1378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1379#[cfg_attr(test, assert_instr(vpmulld))]
1380pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1381 let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1382 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1383 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1384}
1385
1386/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1389#[inline]
1390#[target_feature(enable = "avx512f,avx512vl")]
1391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1392#[cfg_attr(test, assert_instr(vpmulld))]
1393pub unsafe fn _mm256_mask_mullo_epi32(
1394 src: __m256i,
1395 k: __mmask8,
1396 a: __m256i,
1397 b: __m256i,
1398) -> __m256i {
1399 let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1400 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1401}
1402
1403/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1404///
1405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1406#[inline]
1407#[target_feature(enable = "avx512f,avx512vl")]
1408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1409#[cfg_attr(test, assert_instr(vpmulld))]
1410pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1411 let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1412 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1413 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1414}
1415
1416/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1419#[inline]
1420#[target_feature(enable = "avx512f,avx512vl")]
1421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1422#[cfg_attr(test, assert_instr(vpmulld))]
1423pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1424 let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1425 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1426}
1427
1428/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434#[cfg_attr(test, assert_instr(vpmulld))]
1435pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1436 let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1437 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1438 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1439}
1440
1441/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1442///
1443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mullox_epi64&expand=4017)
1444///
1445/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1446#[inline]
1447#[target_feature(enable = "avx512f")]
1448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1449pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1450 transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8()))
1451}
1452
1453/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1454///
1455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_mullox&expand=4016)
1456///
1457/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1458#[inline]
1459#[target_feature(enable = "avx512f")]
1460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1461pub unsafe fn _mm512_mask_mullox_epi64(
1462 src: __m512i,
1463 k: __mmask8,
1464 a: __m512i,
1465 b: __m512i,
1466) -> __m512i {
1467 let mul: i64x8 = _mm512_mullox_epi64(a, b).as_i64x8();
1468 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1469}
1470
1471/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1472///
1473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mul_epu32&expand=3916)
1474#[inline]
1475#[target_feature(enable = "avx512f")]
1476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1477#[cfg_attr(test, assert_instr(vpmuludq))]
1478pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1479 transmute(src:vpmuludq(a:a.as_u32x16(), b:b.as_u32x16()))
1480}
1481
1482/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1483///
1484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_mul_epu32&expand=3914)
1485#[inline]
1486#[target_feature(enable = "avx512f")]
1487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1488#[cfg_attr(test, assert_instr(vpmuludq))]
1489pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1490 let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1491 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1492}
1493
1494/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1495///
1496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_mul_epu32&expand=3915)
1497#[inline]
1498#[target_feature(enable = "avx512f")]
1499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1500#[cfg_attr(test, assert_instr(vpmuludq))]
1501pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1502 let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1503 let zero: u64x8 = _mm512_setzero_si512().as_u64x8();
1504 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1505}
1506
1507/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1508///
1509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1510#[inline]
1511#[target_feature(enable = "avx512f,avx512vl")]
1512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1513#[cfg_attr(test, assert_instr(vpmuludq))]
1514pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1515 let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1516 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1517}
1518
1519/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1520///
1521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1522#[inline]
1523#[target_feature(enable = "avx512f,avx512vl")]
1524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1525#[cfg_attr(test, assert_instr(vpmuludq))]
1526pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1527 let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1528 let zero: u64x4 = _mm256_setzero_si256().as_u64x4();
1529 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1530}
1531
1532/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1533///
1534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1535#[inline]
1536#[target_feature(enable = "avx512f,avx512vl")]
1537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1538#[cfg_attr(test, assert_instr(vpmuludq))]
1539pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1540 let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1541 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1542}
1543
1544/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1545///
1546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1547#[inline]
1548#[target_feature(enable = "avx512f,avx512vl")]
1549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1550#[cfg_attr(test, assert_instr(vpmuludq))]
1551pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1552 let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1553 let zero: u64x2 = _mm_setzero_si128().as_u64x2();
1554 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1555}
1556
1557/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1558///
1559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1560#[inline]
1561#[target_feature(enable = "avx512f")]
1562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1563#[cfg_attr(test, assert_instr(vmulps))]
1564pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1565 transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16()))
1566}
1567
1568/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1569///
1570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1571#[inline]
1572#[target_feature(enable = "avx512f")]
1573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1574#[cfg_attr(test, assert_instr(vmulps))]
1575pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1576 let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1577 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1578}
1579
1580/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1583#[inline]
1584#[target_feature(enable = "avx512f")]
1585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1586#[cfg_attr(test, assert_instr(vmulps))]
1587pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1588 let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1589 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
1590 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1591}
1592
1593/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1594///
1595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1596#[inline]
1597#[target_feature(enable = "avx512f,avx512vl")]
1598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599#[cfg_attr(test, assert_instr(vmulps))]
1600pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1601 let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1602 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1603}
1604
1605/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1606///
1607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1608#[inline]
1609#[target_feature(enable = "avx512f,avx512vl")]
1610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1611#[cfg_attr(test, assert_instr(vmulps))]
1612pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1613 let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1614 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
1615 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1616}
1617
1618/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1619///
1620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1621#[inline]
1622#[target_feature(enable = "avx512f,avx512vl")]
1623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1624#[cfg_attr(test, assert_instr(vmulps))]
1625pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1626 let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1627 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1628}
1629
1630/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1631///
1632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1633#[inline]
1634#[target_feature(enable = "avx512f,avx512vl")]
1635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1636#[cfg_attr(test, assert_instr(vmulps))]
1637pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1638 let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1639 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
1640 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1641}
1642
1643/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1646#[inline]
1647#[target_feature(enable = "avx512f")]
1648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1649#[cfg_attr(test, assert_instr(vmulpd))]
1650pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1651 transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8()))
1652}
1653
1654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1655///
1656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1657#[inline]
1658#[target_feature(enable = "avx512f")]
1659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1660#[cfg_attr(test, assert_instr(vmulpd))]
1661pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1662 let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1663 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1664}
1665
1666/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1667///
1668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1669#[inline]
1670#[target_feature(enable = "avx512f")]
1671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1672#[cfg_attr(test, assert_instr(vmulpd))]
1673pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1674 let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1675 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
1676 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1677}
1678
1679/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1680///
1681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1682#[inline]
1683#[target_feature(enable = "avx512f,avx512vl")]
1684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1685#[cfg_attr(test, assert_instr(vmulpd))]
1686pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1687 let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1688 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1689}
1690
1691/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1694#[inline]
1695#[target_feature(enable = "avx512f,avx512vl")]
1696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1697#[cfg_attr(test, assert_instr(vmulpd))]
1698pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1699 let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1700 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
1701 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1702}
1703
1704/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1705///
1706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1707#[inline]
1708#[target_feature(enable = "avx512f,avx512vl")]
1709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1710#[cfg_attr(test, assert_instr(vmulpd))]
1711pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1712 let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1713 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
1714}
1715
1716/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1717///
1718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1719#[inline]
1720#[target_feature(enable = "avx512f,avx512vl")]
1721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1722#[cfg_attr(test, assert_instr(vmulpd))]
1723pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1724 let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1725 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
1726 transmute(src:simd_select_bitmask(m:k, yes:mul, no:zero))
1727}
1728
1729/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1730///
1731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1732#[inline]
1733#[target_feature(enable = "avx512f")]
1734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1735#[cfg_attr(test, assert_instr(vdivps))]
1736pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1737 transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16()))
1738}
1739
1740/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1741///
1742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1743#[inline]
1744#[target_feature(enable = "avx512f")]
1745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1746#[cfg_attr(test, assert_instr(vdivps))]
1747pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1748 let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1749 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
1750}
1751
1752/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1753///
1754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1755#[inline]
1756#[target_feature(enable = "avx512f")]
1757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1758#[cfg_attr(test, assert_instr(vdivps))]
1759pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1760 let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1761 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
1762 transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1763}
1764
1765/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1766///
1767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1768#[inline]
1769#[target_feature(enable = "avx512f,avx512vl")]
1770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1771#[cfg_attr(test, assert_instr(vdivps))]
1772pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1773 let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1774 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
1775}
1776
1777/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1778///
1779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1780#[inline]
1781#[target_feature(enable = "avx512f,avx512vl")]
1782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1783#[cfg_attr(test, assert_instr(vdivps))]
1784pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1785 let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1786 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
1787 transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1788}
1789
1790/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1791///
1792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1793#[inline]
1794#[target_feature(enable = "avx512f,avx512vl")]
1795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1796#[cfg_attr(test, assert_instr(vdivps))]
1797pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1798 let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1799 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
1800}
1801
1802/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1803///
1804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1805#[inline]
1806#[target_feature(enable = "avx512f,avx512vl")]
1807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1808#[cfg_attr(test, assert_instr(vdivps))]
1809pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1810 let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1811 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
1812 transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1813}
1814
1815/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1816///
1817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_div_pd&expand=2153)
1818#[inline]
1819#[target_feature(enable = "avx512f")]
1820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1821#[cfg_attr(test, assert_instr(vdivpd))]
1822pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1823 transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8()))
1824}
1825
1826/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832#[cfg_attr(test, assert_instr(vdivpd))]
1833pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834 let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
1835 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
1836}
1837
1838/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1839///
1840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
1841#[inline]
1842#[target_feature(enable = "avx512f")]
1843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1844#[cfg_attr(test, assert_instr(vdivpd))]
1845pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1846 let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
1847 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
1848 transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1849}
1850
1851/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
1854#[inline]
1855#[target_feature(enable = "avx512f,avx512vl")]
1856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1857#[cfg_attr(test, assert_instr(vdivpd))]
1858pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1859 let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
1860 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
1861}
1862
1863/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1864///
1865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
1866#[inline]
1867#[target_feature(enable = "avx512f,avx512vl")]
1868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1869#[cfg_attr(test, assert_instr(vdivpd))]
1870pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1871 let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
1872 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
1873 transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1874}
1875
1876/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1877///
1878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
1879#[inline]
1880#[target_feature(enable = "avx512f,avx512vl")]
1881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1882#[cfg_attr(test, assert_instr(vdivpd))]
1883pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1884 let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
1885 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
1886}
1887
1888/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1889///
1890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
1891#[inline]
1892#[target_feature(enable = "avx512f,avx512vl")]
1893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1894#[cfg_attr(test, assert_instr(vdivpd))]
1895pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1896 let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
1897 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
1898 transmute(src:simd_select_bitmask(m:k, yes:div, no:zero))
1899}
1900
1901/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
1902///
1903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
1904#[inline]
1905#[target_feature(enable = "avx512f")]
1906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1907#[cfg_attr(test, assert_instr(vpmaxsd))]
1908pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
1909 transmute(src:vpmaxsd(a:a.as_i32x16(), b:b.as_i32x16()))
1910}
1911
1912/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1913///
1914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
1915#[inline]
1916#[target_feature(enable = "avx512f")]
1917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1918#[cfg_attr(test, assert_instr(vpmaxsd))]
1919pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1920 let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
1921 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
1922}
1923
1924/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1925///
1926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
1927#[inline]
1928#[target_feature(enable = "avx512f")]
1929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1930#[cfg_attr(test, assert_instr(vpmaxsd))]
1931pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1932 let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
1933 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
1934 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
1935}
1936
1937/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1938///
1939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
1940#[inline]
1941#[target_feature(enable = "avx512f,avx512vl")]
1942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1943#[cfg_attr(test, assert_instr(vpmaxsd))]
1944pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1945 let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
1946 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
1947}
1948
1949/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vpmaxsd))]
1956pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1957 let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
1958 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1959 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
1960}
1961
1962/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1963///
1964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
1965#[inline]
1966#[target_feature(enable = "avx512f,avx512vl")]
1967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1968#[cfg_attr(test, assert_instr(vpmaxsd))]
1969pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1970 let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
1971 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
1972}
1973
1974/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975///
1976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
1977#[inline]
1978#[target_feature(enable = "avx512f,avx512vl")]
1979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1980#[cfg_attr(test, assert_instr(vpmaxsd))]
1981pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1982 let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
1983 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1984 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
1985}
1986
1987/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
1988///
1989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
1990#[inline]
1991#[target_feature(enable = "avx512f")]
1992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1993#[cfg_attr(test, assert_instr(vpmaxsq))]
1994pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
1995 transmute(src:vpmaxsq(a:a.as_i64x8(), b:b.as_i64x8()))
1996}
1997
1998/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1999///
2000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2001#[inline]
2002#[target_feature(enable = "avx512f")]
2003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2004#[cfg_attr(test, assert_instr(vpmaxsq))]
2005pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2006 let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2007 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2008}
2009
2010/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2011///
2012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2013#[inline]
2014#[target_feature(enable = "avx512f")]
2015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2016#[cfg_attr(test, assert_instr(vpmaxsq))]
2017pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2018 let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2019 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
2020 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2021}
2022
2023/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2024///
2025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2026#[inline]
2027#[target_feature(enable = "avx512f,avx512vl")]
2028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2029#[cfg_attr(test, assert_instr(vpmaxsq))]
2030pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2031 transmute(src:vpmaxsq256(a:a.as_i64x4(), b:b.as_i64x4()))
2032}
2033
2034/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2035///
2036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2037#[inline]
2038#[target_feature(enable = "avx512f,avx512vl")]
2039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2040#[cfg_attr(test, assert_instr(vpmaxsq))]
2041pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2042 let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2043 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2044}
2045
2046/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2049#[inline]
2050#[target_feature(enable = "avx512f,avx512vl")]
2051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2052#[cfg_attr(test, assert_instr(vpmaxsq))]
2053pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2054 let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2055 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2056 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2057}
2058
2059/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2060///
2061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2062#[inline]
2063#[target_feature(enable = "avx512f,avx512vl")]
2064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2065#[cfg_attr(test, assert_instr(vpmaxsq))]
2066pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2067 transmute(src:vpmaxsq128(a:a.as_i64x2(), b:b.as_i64x2()))
2068}
2069
2070/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2073#[inline]
2074#[target_feature(enable = "avx512f,avx512vl")]
2075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2076#[cfg_attr(test, assert_instr(vpmaxsq))]
2077pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2078 let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2079 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2080}
2081
2082/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2083///
2084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2085#[inline]
2086#[target_feature(enable = "avx512f,avx512vl")]
2087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2088#[cfg_attr(test, assert_instr(vpmaxsq))]
2089pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2090 let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2091 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
2092 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2093}
2094
2095/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2096///
2097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2098#[inline]
2099#[target_feature(enable = "avx512f")]
2100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2101#[cfg_attr(test, assert_instr(vmaxps))]
2102pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2103 transmute(src:vmaxps(
2104 a:a.as_f32x16(),
2105 b:b.as_f32x16(),
2106 _MM_FROUND_CUR_DIRECTION,
2107 ))
2108}
2109
2110/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2111///
2112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2113#[inline]
2114#[target_feature(enable = "avx512f")]
2115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2116#[cfg_attr(test, assert_instr(vmaxps))]
2117pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2118 let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2119 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2120}
2121
2122/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2123///
2124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2125#[inline]
2126#[target_feature(enable = "avx512f")]
2127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2128#[cfg_attr(test, assert_instr(vmaxps))]
2129pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2130 let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2131 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
2132 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2133}
2134
2135/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2136///
2137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2138#[inline]
2139#[target_feature(enable = "avx512f,avx512vl")]
2140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2141#[cfg_attr(test, assert_instr(vmaxps))]
2142pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2143 let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2144 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2145}
2146
2147/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2148///
2149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2150#[inline]
2151#[target_feature(enable = "avx512f,avx512vl")]
2152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2153#[cfg_attr(test, assert_instr(vmaxps))]
2154pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2155 let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2156 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
2157 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2158}
2159
2160/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2161///
2162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2163#[inline]
2164#[target_feature(enable = "avx512f,avx512vl")]
2165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2166#[cfg_attr(test, assert_instr(vmaxps))]
2167pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2168 let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2169 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2170}
2171
2172/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2175#[inline]
2176#[target_feature(enable = "avx512f,avx512vl")]
2177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2178#[cfg_attr(test, assert_instr(vmaxps))]
2179pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2180 let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2181 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
2182 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2183}
2184
2185/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191#[cfg_attr(test, assert_instr(vmaxpd))]
2192pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2193 transmute(src:vmaxpd(a:a.as_f64x8(), b:b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
2194}
2195
2196/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2197///
2198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2199#[inline]
2200#[target_feature(enable = "avx512f")]
2201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2202#[cfg_attr(test, assert_instr(vmaxpd))]
2203pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2204 let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2205 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2206}
2207
2208/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2209///
2210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2211#[inline]
2212#[target_feature(enable = "avx512f")]
2213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2214#[cfg_attr(test, assert_instr(vmaxpd))]
2215pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2216 let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2217 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
2218 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2219}
2220
2221/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2222///
2223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2224#[inline]
2225#[target_feature(enable = "avx512f,avx512vl")]
2226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2227#[cfg_attr(test, assert_instr(vmaxpd))]
2228pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2229 let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2230 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2231}
2232
2233/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2236#[inline]
2237#[target_feature(enable = "avx512f,avx512vl")]
2238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2239#[cfg_attr(test, assert_instr(vmaxpd))]
2240pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2241 let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2242 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
2243 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2244}
2245
2246/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2247///
2248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2249#[inline]
2250#[target_feature(enable = "avx512f,avx512vl")]
2251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2252#[cfg_attr(test, assert_instr(vmaxpd))]
2253pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2254 let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2255 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2256}
2257
2258/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2259///
2260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2261#[inline]
2262#[target_feature(enable = "avx512f,avx512vl")]
2263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2264#[cfg_attr(test, assert_instr(vmaxpd))]
2265pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2266 let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2267 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
2268 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2269}
2270
2271/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2274#[inline]
2275#[target_feature(enable = "avx512f")]
2276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277#[cfg_attr(test, assert_instr(vpmaxud))]
2278pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2279 transmute(src:vpmaxud(a:a.as_u32x16(), b:b.as_u32x16()))
2280}
2281
2282/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2283///
2284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2285#[inline]
2286#[target_feature(enable = "avx512f")]
2287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2288#[cfg_attr(test, assert_instr(vpmaxud))]
2289pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2290 let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2291 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2292}
2293
2294/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2295///
2296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2297#[inline]
2298#[target_feature(enable = "avx512f")]
2299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2300#[cfg_attr(test, assert_instr(vpmaxud))]
2301pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2302 let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2303 let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
2304 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2305}
2306
2307/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2310#[inline]
2311#[target_feature(enable = "avx512f,avx512vl")]
2312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2313#[cfg_attr(test, assert_instr(vpmaxud))]
2314pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2315 let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2316 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2317}
2318
2319/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2320///
2321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2322#[inline]
2323#[target_feature(enable = "avx512f,avx512vl")]
2324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2325#[cfg_attr(test, assert_instr(vpmaxud))]
2326pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2327 let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2328 let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
2329 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2330}
2331
2332/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2335#[inline]
2336#[target_feature(enable = "avx512f,avx512vl")]
2337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2338#[cfg_attr(test, assert_instr(vpmaxud))]
2339pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2340 let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2341 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2342}
2343
2344/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2345///
2346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2347#[inline]
2348#[target_feature(enable = "avx512f,avx512vl")]
2349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2350#[cfg_attr(test, assert_instr(vpmaxud))]
2351pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2352 let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2353 let zero: u32x4 = _mm_setzero_si128().as_u32x4();
2354 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2355}
2356
2357/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2358///
2359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2360#[inline]
2361#[target_feature(enable = "avx512f")]
2362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2363#[cfg_attr(test, assert_instr(vpmaxuq))]
2364pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2365 transmute(src:vpmaxuq(a:a.as_u64x8(), b:b.as_u64x8()))
2366}
2367
2368/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2369///
2370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2371#[inline]
2372#[target_feature(enable = "avx512f")]
2373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2374#[cfg_attr(test, assert_instr(vpmaxuq))]
2375pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2376 let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2377 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2378}
2379
2380/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2381///
2382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu&expand=3626)
2383#[inline]
2384#[target_feature(enable = "avx512f")]
2385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2386#[cfg_attr(test, assert_instr(vpmaxuq))]
2387pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2388 let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2389 let zero: u64x8 = _mm512_setzero_si512().as_u64x8();
2390 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2391}
2392
2393/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2394///
2395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2396#[inline]
2397#[target_feature(enable = "avx512f,avx512vl")]
2398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2399#[cfg_attr(test, assert_instr(vpmaxuq))]
2400pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2401 transmute(src:vpmaxuq256(a:a.as_u64x4(), b:b.as_u64x4()))
2402}
2403
2404/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2405///
2406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2407#[inline]
2408#[target_feature(enable = "avx512f,avx512vl")]
2409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2410#[cfg_attr(test, assert_instr(vpmaxuq))]
2411pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412 let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2413 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2414}
2415
2416/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2417///
2418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2419#[inline]
2420#[target_feature(enable = "avx512f,avx512vl")]
2421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2422#[cfg_attr(test, assert_instr(vpmaxuq))]
2423pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2424 let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2425 let zero: u64x4 = _mm256_setzero_si256().as_u64x4();
2426 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2427}
2428
2429/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2430///
2431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2432#[inline]
2433#[target_feature(enable = "avx512f,avx512vl")]
2434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2435#[cfg_attr(test, assert_instr(vpmaxuq))]
2436pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2437 transmute(src:vpmaxuq128(a:a.as_u64x2(), b:b.as_u64x2()))
2438}
2439
2440/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2443#[inline]
2444#[target_feature(enable = "avx512f,avx512vl")]
2445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446#[cfg_attr(test, assert_instr(vpmaxuq))]
2447pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2448 let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2449 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2450}
2451
2452/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2453///
2454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2455#[inline]
2456#[target_feature(enable = "avx512f,avx512vl")]
2457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2458#[cfg_attr(test, assert_instr(vpmaxuq))]
2459pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2460 let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2461 let zero: u64x2 = _mm_setzero_si128().as_u64x2();
2462 transmute(src:simd_select_bitmask(m:k, yes:max, no:zero))
2463}
2464
2465/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2466///
2467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2468#[inline]
2469#[target_feature(enable = "avx512f")]
2470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2471#[cfg_attr(test, assert_instr(vpminsd))]
2472pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2473 transmute(src:vpminsd(a:a.as_i32x16(), b:b.as_i32x16()))
2474}
2475
2476/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2477///
2478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2479#[inline]
2480#[target_feature(enable = "avx512f")]
2481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2482#[cfg_attr(test, assert_instr(vpminsd))]
2483pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2484 let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2485 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2486}
2487
2488/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2491#[inline]
2492#[target_feature(enable = "avx512f")]
2493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2494#[cfg_attr(test, assert_instr(vpminsd))]
2495pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2496 let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2497 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
2498 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2499}
2500
2501/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2502///
2503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2504#[inline]
2505#[target_feature(enable = "avx512f,avx512vl")]
2506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2507#[cfg_attr(test, assert_instr(vpminsd))]
2508pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2509 let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2510 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2511}
2512
2513/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2514///
2515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2516#[inline]
2517#[target_feature(enable = "avx512f,avx512vl")]
2518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2519#[cfg_attr(test, assert_instr(vpminsd))]
2520pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2521 let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2522 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
2523 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2524}
2525
2526/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2529#[inline]
2530#[target_feature(enable = "avx512f,avx512vl")]
2531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2532#[cfg_attr(test, assert_instr(vpminsd))]
2533pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2534 let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2535 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2536}
2537
2538/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2539///
2540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2541#[inline]
2542#[target_feature(enable = "avx512f,avx512vl")]
2543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2544#[cfg_attr(test, assert_instr(vpminsd))]
2545pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2546 let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2547 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
2548 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2549}
2550
2551/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2552///
2553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2554#[inline]
2555#[target_feature(enable = "avx512f")]
2556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2557#[cfg_attr(test, assert_instr(vpminsq))]
2558pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2559 transmute(src:vpminsq(a:a.as_i64x8(), b:b.as_i64x8()))
2560}
2561
2562/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2565#[inline]
2566#[target_feature(enable = "avx512f")]
2567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2568#[cfg_attr(test, assert_instr(vpminsq))]
2569pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2570 let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2571 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
2572}
2573
2574/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2575///
2576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_min_epi64&expand=3704)
2577#[inline]
2578#[target_feature(enable = "avx512f")]
2579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2580#[cfg_attr(test, assert_instr(vpminsq))]
2581pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2582 let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2583 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
2584 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2585}
2586
2587/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2588///
2589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2590#[inline]
2591#[target_feature(enable = "avx512f,avx512vl")]
2592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2593#[cfg_attr(test, assert_instr(vpminsq))]
2594pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2595 transmute(src:vpminsq256(a:a.as_i64x4(), b:b.as_i64x4()))
2596}
2597
2598/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2599///
2600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2601#[inline]
2602#[target_feature(enable = "avx512f,avx512vl")]
2603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2604#[cfg_attr(test, assert_instr(vpminsq))]
2605pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2606 let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2607 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
2608}
2609
2610/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2611///
2612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2613#[inline]
2614#[target_feature(enable = "avx512f,avx512vl")]
2615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2616#[cfg_attr(test, assert_instr(vpminsq))]
2617pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2618 let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2619 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2620 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2621}
2622
2623/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2624///
2625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2626#[inline]
2627#[target_feature(enable = "avx512f")]
2628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2629#[cfg_attr(test, assert_instr(vminps))]
2630pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2631 transmute(src:vminps(
2632 a:a.as_f32x16(),
2633 b:b.as_f32x16(),
2634 _MM_FROUND_CUR_DIRECTION,
2635 ))
2636}
2637
2638/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644#[cfg_attr(test, assert_instr(vminps))]
2645pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2646 let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2647 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
2648}
2649
2650/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2651///
2652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
2653#[inline]
2654#[target_feature(enable = "avx512f")]
2655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2656#[cfg_attr(test, assert_instr(vminps))]
2657pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2658 let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2659 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
2660 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2661}
2662
2663/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2664///
2665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
2666#[inline]
2667#[target_feature(enable = "avx512f,avx512vl")]
2668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2669#[cfg_attr(test, assert_instr(vminps))]
2670pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2671 let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
2672 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
2673}
2674
2675/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2676///
2677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
2678#[inline]
2679#[target_feature(enable = "avx512f,avx512vl")]
2680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2681#[cfg_attr(test, assert_instr(vminps))]
2682pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2683 let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
2684 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
2685 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2686}
2687
2688/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
2691#[inline]
2692#[target_feature(enable = "avx512f,avx512vl")]
2693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2694#[cfg_attr(test, assert_instr(vminps))]
2695pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2696 let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
2697 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
2698}
2699
2700/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2701///
2702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
2703#[inline]
2704#[target_feature(enable = "avx512f,avx512vl")]
2705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2706#[cfg_attr(test, assert_instr(vminps))]
2707pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2708 let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
2709 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
2710 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2711}
2712
2713/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
2714
2715/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_min_pd&expand=3759)
2718#[inline]
2719#[target_feature(enable = "avx512f")]
2720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2721#[cfg_attr(test, assert_instr(vminpd))]
2722pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
2723 transmute(src:vminpd(a:a.as_f64x8(), b:b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
2724}
2725
2726/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2727///
2728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_min_pd&expand=3757)
2729#[inline]
2730#[target_feature(enable = "avx512f")]
2731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2732#[cfg_attr(test, assert_instr(vminpd))]
2733pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2734 let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
2735 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
2736}
2737
2738/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_min_pd&expand=3758)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vminpd))]
2745pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2746 let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
2747 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
2748 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2749}
2750
2751/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
2754#[inline]
2755#[target_feature(enable = "avx512f,avx512vl")]
2756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2757#[cfg_attr(test, assert_instr(vminpd))]
2758pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2759 let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
2760 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
2761}
2762
2763/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
2766#[inline]
2767#[target_feature(enable = "avx512f,avx512vl")]
2768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2769#[cfg_attr(test, assert_instr(vminpd))]
2770pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2771 let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
2772 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
2773 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2774}
2775
2776/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2777///
2778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
2779#[inline]
2780#[target_feature(enable = "avx512f,avx512vl")]
2781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2782#[cfg_attr(test, assert_instr(vminpd))]
2783pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2784 let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
2785 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
2786}
2787
2788/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2789///
2790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
2791#[inline]
2792#[target_feature(enable = "avx512f,avx512vl")]
2793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2794#[cfg_attr(test, assert_instr(vminpd))]
2795pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2796 let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
2797 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
2798 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2799}
2800
2801/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
2802///
2803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
2804#[inline]
2805#[target_feature(enable = "avx512f")]
2806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2807#[cfg_attr(test, assert_instr(vpminud))]
2808pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
2809 transmute(src:vpminud(a:a.as_u32x16(), b:b.as_u32x16()))
2810}
2811
2812/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2813///
2814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
2815#[inline]
2816#[target_feature(enable = "avx512f")]
2817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2818#[cfg_attr(test, assert_instr(vpminud))]
2819pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2820 let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
2821 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
2822}
2823
2824/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2825///
2826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
2827#[inline]
2828#[target_feature(enable = "avx512f")]
2829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2830#[cfg_attr(test, assert_instr(vpminud))]
2831pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2832 let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
2833 let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
2834 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2835}
2836
2837/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
2840#[inline]
2841#[target_feature(enable = "avx512f,avx512vl")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpminud))]
2844pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2845 let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
2846 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
2847}
2848
2849/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2850///
2851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
2852#[inline]
2853#[target_feature(enable = "avx512f,avx512vl")]
2854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2855#[cfg_attr(test, assert_instr(vpminud))]
2856pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2857 let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
2858 let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
2859 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2860}
2861
2862/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2863///
2864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
2865#[inline]
2866#[target_feature(enable = "avx512f,avx512vl")]
2867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2868#[cfg_attr(test, assert_instr(vpminud))]
2869pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2870 let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
2871 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
2872}
2873
2874/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2875///
2876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
2877#[inline]
2878#[target_feature(enable = "avx512f,avx512vl")]
2879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2880#[cfg_attr(test, assert_instr(vpminud))]
2881pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2882 let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
2883 let zero: u32x4 = _mm_setzero_si128().as_u32x4();
2884 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2885}
2886
2887/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2888///
2889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
2890#[inline]
2891#[target_feature(enable = "avx512f")]
2892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2893#[cfg_attr(test, assert_instr(vpminuq))]
2894pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
2895 transmute(src:vpminuq(a:a.as_u64x8(), b:b.as_u64x8()))
2896}
2897
2898/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2899///
2900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
2901#[inline]
2902#[target_feature(enable = "avx512f")]
2903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2904#[cfg_attr(test, assert_instr(vpminuq))]
2905pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2906 let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
2907 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
2908}
2909
2910/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2911///
2912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
2913#[inline]
2914#[target_feature(enable = "avx512f")]
2915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2916#[cfg_attr(test, assert_instr(vpminuq))]
2917pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2918 let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
2919 let zero: u64x8 = _mm512_setzero_si512().as_u64x8();
2920 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2921}
2922
2923/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929#[cfg_attr(test, assert_instr(vpminuq))]
2930pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
2931 transmute(src:vpminuq256(a:a.as_u64x4(), b:b.as_u64x4()))
2932}
2933
2934/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2935///
2936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
2937#[inline]
2938#[target_feature(enable = "avx512f,avx512vl")]
2939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2940#[cfg_attr(test, assert_instr(vpminuq))]
2941pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2942 let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
2943 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
2944}
2945
2946/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2947///
2948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
2949#[inline]
2950#[target_feature(enable = "avx512f,avx512vl")]
2951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2952#[cfg_attr(test, assert_instr(vpminuq))]
2953pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2954 let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
2955 let zero: u64x4 = _mm256_setzero_si256().as_u64x4();
2956 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2957}
2958
2959/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2960///
2961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
2962#[inline]
2963#[target_feature(enable = "avx512f,avx512vl")]
2964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2965#[cfg_attr(test, assert_instr(vpminuq))]
2966pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
2967 transmute(src:vpminuq128(a:a.as_u64x2(), b:b.as_u64x2()))
2968}
2969
2970/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2971///
2972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
2973#[inline]
2974#[target_feature(enable = "avx512f,avx512vl")]
2975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2976#[cfg_attr(test, assert_instr(vpminuq))]
2977pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2978 let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
2979 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
2980}
2981
2982/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2983///
2984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
2985#[inline]
2986#[target_feature(enable = "avx512f,avx512vl")]
2987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2988#[cfg_attr(test, assert_instr(vpminuq))]
2989pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2990 let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
2991 let zero: u64x2 = _mm_setzero_si128().as_u64x2();
2992 transmute(src:simd_select_bitmask(m:k, yes:min, no:zero))
2993}
2994
2995/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
2996///
2997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
2998#[inline]
2999#[target_feature(enable = "avx512f")]
3000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3001#[cfg_attr(test, assert_instr(vsqrtps))]
3002pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3003 transmute(src:vsqrtps(a:a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
3004}
3005
3006/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3007///
3008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3009#[inline]
3010#[target_feature(enable = "avx512f")]
3011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3012#[cfg_attr(test, assert_instr(vsqrtps))]
3013pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3014 let sqrt: f32x16 = _mm512_sqrt_ps(a).as_f32x16();
3015 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f32x16()))
3016}
3017
3018/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3019///
3020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3021#[inline]
3022#[target_feature(enable = "avx512f")]
3023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3024#[cfg_attr(test, assert_instr(vsqrtps))]
3025pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3026 let sqrt: f32x16 = _mm512_sqrt_ps(a).as_f32x16();
3027 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3028 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3029}
3030
3031/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3032///
3033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3034#[inline]
3035#[target_feature(enable = "avx512f,avx512vl")]
3036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3037#[cfg_attr(test, assert_instr(vsqrtps))]
3038pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3039 let sqrt: f32x8 = _mm256_sqrt_ps(a).as_f32x8();
3040 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f32x8()))
3041}
3042
3043/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3044///
3045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3046#[inline]
3047#[target_feature(enable = "avx512f,avx512vl")]
3048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3049#[cfg_attr(test, assert_instr(vsqrtps))]
3050pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3051 let sqrt: f32x8 = _mm256_sqrt_ps(a).as_f32x8();
3052 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3053 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3054}
3055
3056/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3057///
3058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3059#[inline]
3060#[target_feature(enable = "avx512f,avx512vl")]
3061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3062#[cfg_attr(test, assert_instr(vsqrtps))]
3063pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3064 let sqrt: f32x4 = _mm_sqrt_ps(a).as_f32x4();
3065 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f32x4()))
3066}
3067
3068/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3069///
3070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3071#[inline]
3072#[target_feature(enable = "avx512f,avx512vl")]
3073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3074#[cfg_attr(test, assert_instr(vsqrtps))]
3075pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3076 let sqrt: f32x4 = _mm_sqrt_ps(a).as_f32x4();
3077 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3078 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3079}
3080
3081/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3082///
3083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3084#[inline]
3085#[target_feature(enable = "avx512f")]
3086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3087#[cfg_attr(test, assert_instr(vsqrtpd))]
3088pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3089 transmute(src:vsqrtpd(a:a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
3090}
3091
3092/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098#[cfg_attr(test, assert_instr(vsqrtpd))]
3099pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3100 let sqrt: f64x8 = _mm512_sqrt_pd(a).as_f64x8();
3101 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f64x8()))
3102}
3103
3104/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3105///
3106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3107#[inline]
3108#[target_feature(enable = "avx512f")]
3109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3110#[cfg_attr(test, assert_instr(vsqrtpd))]
3111pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3112 let sqrt: f64x8 = _mm512_sqrt_pd(a).as_f64x8();
3113 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3114 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3115}
3116
3117/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3118///
3119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3120#[inline]
3121#[target_feature(enable = "avx512f,avx512vl")]
3122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3123#[cfg_attr(test, assert_instr(vsqrtpd))]
3124pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3125 let sqrt: f64x4 = _mm256_sqrt_pd(a).as_f64x4();
3126 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f64x4()))
3127}
3128
3129/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3130///
3131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3132#[inline]
3133#[target_feature(enable = "avx512f,avx512vl")]
3134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3135#[cfg_attr(test, assert_instr(vsqrtpd))]
3136pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3137 let sqrt: f64x4 = _mm256_sqrt_pd(a).as_f64x4();
3138 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3139 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3140}
3141
3142/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3143///
3144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3145#[inline]
3146#[target_feature(enable = "avx512f,avx512vl")]
3147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3148#[cfg_attr(test, assert_instr(vsqrtpd))]
3149pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3150 let sqrt: f64x2 = _mm_sqrt_pd(a).as_f64x2();
3151 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:src.as_f64x2()))
3152}
3153
3154/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3155///
3156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3157#[inline]
3158#[target_feature(enable = "avx512f,avx512vl")]
3159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3160#[cfg_attr(test, assert_instr(vsqrtpd))]
3161pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3162 let sqrt: f64x2 = _mm_sqrt_pd(a).as_f64x2();
3163 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3164 transmute(src:simd_select_bitmask(m:k, yes:sqrt, no:zero))
3165}
3166
3167/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3168///
3169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3170#[inline]
3171#[target_feature(enable = "avx512f")]
3172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3173#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3174pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3175 transmute(src:vfmadd132ps(a:a.as_f32x16(), b:b.as_f32x16(), c:c.as_f32x16()))
3176}
3177
3178/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3179///
3180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3181#[inline]
3182#[target_feature(enable = "avx512f")]
3183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3184#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3185pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3186 let fmadd: f32x16 = _mm512_fmadd_ps(a, b, c).as_f32x16();
3187 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f32x16()))
3188}
3189
3190/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3191///
3192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3193#[inline]
3194#[target_feature(enable = "avx512f")]
3195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3196#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3197pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3198 let fmadd: f32x16 = _mm512_fmadd_ps(a, b, c).as_f32x16();
3199 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3200 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3201}
3202
3203/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3204///
3205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3206#[inline]
3207#[target_feature(enable = "avx512f")]
3208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3209#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3210pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3211 let fmadd: f32x16 = _mm512_fmadd_ps(a, b, c).as_f32x16();
3212 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f32x16()))
3213}
3214
3215/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3216///
3217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3218#[inline]
3219#[target_feature(enable = "avx512f,avx512vl")]
3220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3221#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3222pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3223 let fmadd: f32x8 = _mm256_fmadd_ps(a, b, c).as_f32x8();
3224 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f32x8()))
3225}
3226
3227/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3228///
3229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3230#[inline]
3231#[target_feature(enable = "avx512f,avx512vl")]
3232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3233#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3234pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3235 let fmadd: f32x8 = _mm256_fmadd_ps(a, b, c).as_f32x8();
3236 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3237 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3238}
3239
3240/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3241///
3242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3243#[inline]
3244#[target_feature(enable = "avx512f,avx512vl")]
3245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3246#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3247pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3248 let fmadd: f32x8 = _mm256_fmadd_ps(a, b, c).as_f32x8();
3249 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f32x8()))
3250}
3251
3252/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3253///
3254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3255#[inline]
3256#[target_feature(enable = "avx512f,avx512vl")]
3257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3258#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3259pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3260 let fmadd: f32x4 = _mm_fmadd_ps(a, b, c).as_f32x4();
3261 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f32x4()))
3262}
3263
3264/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3265///
3266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3267#[inline]
3268#[target_feature(enable = "avx512f,avx512vl")]
3269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3270#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3271pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3272 let fmadd: f32x4 = _mm_fmadd_ps(a, b, c).as_f32x4();
3273 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3274 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3275}
3276
3277/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3278///
3279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3280#[inline]
3281#[target_feature(enable = "avx512f,avx512vl")]
3282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3283#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3284pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3285 let fmadd: f32x4 = _mm_fmadd_ps(a, b, c).as_f32x4();
3286 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f32x4()))
3287}
3288
3289/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3290///
3291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3292#[inline]
3293#[target_feature(enable = "avx512f")]
3294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3295#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3296pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3297 transmute(src:vfmadd132pd(a:a.as_f64x8(), b:b.as_f64x8(), c:c.as_f64x8()))
3298}
3299
3300/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3301///
3302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3303#[inline]
3304#[target_feature(enable = "avx512f")]
3305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3306#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3307pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3308 let fmadd: f64x8 = _mm512_fmadd_pd(a, b, c).as_f64x8();
3309 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f64x8()))
3310}
3311
3312/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3313///
3314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3315#[inline]
3316#[target_feature(enable = "avx512f")]
3317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3318#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3319pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3320 let fmadd: f64x8 = _mm512_fmadd_pd(a, b, c).as_f64x8();
3321 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3322 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3323}
3324
3325/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3326///
3327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3328#[inline]
3329#[target_feature(enable = "avx512f")]
3330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3331#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3332pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3333 let fmadd: f64x8 = _mm512_fmadd_pd(a, b, c).as_f64x8();
3334 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f64x8()))
3335}
3336
3337/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3338///
3339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3340#[inline]
3341#[target_feature(enable = "avx512f,avx512vl")]
3342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3343#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3344pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3345 let fmadd: f64x4 = _mm256_fmadd_pd(a, b, c).as_f64x4();
3346 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f64x4()))
3347}
3348
3349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3350///
3351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3352#[inline]
3353#[target_feature(enable = "avx512f,avx512vl")]
3354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3355#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3356pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3357 let fmadd: f64x4 = _mm256_fmadd_pd(a, b, c).as_f64x4();
3358 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3359 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3360}
3361
3362/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3369pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3370 let fmadd: f64x4 = _mm256_fmadd_pd(a, b, c).as_f64x4();
3371 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f64x4()))
3372}
3373
3374/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3375///
3376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3377#[inline]
3378#[target_feature(enable = "avx512f,avx512vl")]
3379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3380#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3381pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3382 let fmadd: f64x2 = _mm_fmadd_pd(a, b, c).as_f64x2();
3383 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:a.as_f64x2()))
3384}
3385
3386/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3387///
3388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3389#[inline]
3390#[target_feature(enable = "avx512f,avx512vl")]
3391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3392#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3393pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3394 let fmadd: f64x2 = _mm_fmadd_pd(a, b, c).as_f64x2();
3395 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3396 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:zero))
3397}
3398
3399/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3402#[inline]
3403#[target_feature(enable = "avx512f,avx512vl")]
3404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3405#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3406pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3407 let fmadd: f64x2 = _mm_fmadd_pd(a, b, c).as_f64x2();
3408 transmute(src:simd_select_bitmask(m:k, yes:fmadd, no:c.as_f64x2()))
3409}
3410
3411/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3412///
3413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3414#[inline]
3415#[target_feature(enable = "avx512f")]
3416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3417#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3418pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3419 let zero: f32x16 = mem::zeroed();
3420 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
3421 transmute(src:vfmadd132ps(a:a.as_f32x16(), b:b.as_f32x16(), c:sub))
3422}
3423
3424/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3425///
3426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3427#[inline]
3428#[target_feature(enable = "avx512f")]
3429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3430#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3431pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3432 let fmsub: f32x16 = _mm512_fmsub_ps(a, b, c).as_f32x16();
3433 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f32x16()))
3434}
3435
3436/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3437///
3438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3439#[inline]
3440#[target_feature(enable = "avx512f")]
3441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3442#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3443pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3444 let fmsub: f32x16 = _mm512_fmsub_ps(a, b, c).as_f32x16();
3445 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3446 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3447}
3448
3449/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3450///
3451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3452#[inline]
3453#[target_feature(enable = "avx512f")]
3454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3455#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3456pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3457 let fmsub: f32x16 = _mm512_fmsub_ps(a, b, c).as_f32x16();
3458 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f32x16()))
3459}
3460
3461/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3462///
3463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3464#[inline]
3465#[target_feature(enable = "avx512f,avx512vl")]
3466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3467#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3468pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3469 let fmsub: f32x8 = _mm256_fmsub_ps(a, b, c).as_f32x8();
3470 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f32x8()))
3471}
3472
3473/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3474///
3475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3476#[inline]
3477#[target_feature(enable = "avx512f,avx512vl")]
3478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3479#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3480pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3481 let fmsub: f32x8 = _mm256_fmsub_ps(a, b, c).as_f32x8();
3482 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3483 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3484}
3485
3486/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3487///
3488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3489#[inline]
3490#[target_feature(enable = "avx512f,avx512vl")]
3491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3492#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3493pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3494 let fmsub: f32x8 = _mm256_fmsub_ps(a, b, c).as_f32x8();
3495 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f32x8()))
3496}
3497
3498/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3499///
3500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3501#[inline]
3502#[target_feature(enable = "avx512f,avx512vl")]
3503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3504#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3505pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3506 let fmsub: f32x4 = _mm_fmsub_ps(a, b, c).as_f32x4();
3507 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f32x4()))
3508}
3509
3510/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3511///
3512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3513#[inline]
3514#[target_feature(enable = "avx512f,avx512vl")]
3515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3516#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3517pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3518 let fmsub: f32x4 = _mm_fmsub_ps(a, b, c).as_f32x4();
3519 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3520 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3521}
3522
3523/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3524///
3525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3526#[inline]
3527#[target_feature(enable = "avx512f,avx512vl")]
3528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3529#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3530pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3531 let fmsub: f32x4 = _mm_fmsub_ps(a, b, c).as_f32x4();
3532 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f32x4()))
3533}
3534
3535/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3536///
3537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3538#[inline]
3539#[target_feature(enable = "avx512f")]
3540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3541#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3542pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3543 let zero: f64x8 = mem::zeroed();
3544 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
3545 transmute(src:vfmadd132pd(a:a.as_f64x8(), b:b.as_f64x8(), c:sub))
3546}
3547
3548/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3549///
3550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3551#[inline]
3552#[target_feature(enable = "avx512f")]
3553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3554#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3555pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3556 let fmsub: f64x8 = _mm512_fmsub_pd(a, b, c).as_f64x8();
3557 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f64x8()))
3558}
3559
3560/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3561///
3562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3563#[inline]
3564#[target_feature(enable = "avx512f")]
3565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3566#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3567pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3568 let fmsub: f64x8 = _mm512_fmsub_pd(a, b, c).as_f64x8();
3569 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3570 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3571}
3572
3573/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3574///
3575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3576#[inline]
3577#[target_feature(enable = "avx512f")]
3578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3579#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3580pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3581 let fmsub: f64x8 = _mm512_fmsub_pd(a, b, c).as_f64x8();
3582 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f64x8()))
3583}
3584
3585/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3586///
3587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3588#[inline]
3589#[target_feature(enable = "avx512f,avx512vl")]
3590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3591#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3592pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3593 let fmsub: f64x4 = _mm256_fmsub_pd(a, b, c).as_f64x4();
3594 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f64x4()))
3595}
3596
3597/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3598///
3599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3600#[inline]
3601#[target_feature(enable = "avx512f,avx512vl")]
3602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3603#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3604pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3605 let fmsub: f64x4 = _mm256_fmsub_pd(a, b, c).as_f64x4();
3606 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3607 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3608}
3609
3610/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3617pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3618 let fmsub: f64x4 = _mm256_fmsub_pd(a, b, c).as_f64x4();
3619 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f64x4()))
3620}
3621
3622/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3623///
3624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3625#[inline]
3626#[target_feature(enable = "avx512f,avx512vl")]
3627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3628#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3629pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3630 let fmsub: f64x2 = _mm_fmsub_pd(a, b, c).as_f64x2();
3631 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:a.as_f64x2()))
3632}
3633
3634/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3635///
3636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3637#[inline]
3638#[target_feature(enable = "avx512f,avx512vl")]
3639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3640#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3641pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3642 let fmsub: f64x2 = _mm_fmsub_pd(a, b, c).as_f64x2();
3643 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3644 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:zero))
3645}
3646
3647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3648///
3649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3650#[inline]
3651#[target_feature(enable = "avx512f,avx512vl")]
3652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3653#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3654pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3655 let fmsub: f64x2 = _mm_fmsub_pd(a, b, c).as_f64x2();
3656 transmute(src:simd_select_bitmask(m:k, yes:fmsub, no:c.as_f64x2()))
3657}
3658
3659/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3660///
3661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3662#[inline]
3663#[target_feature(enable = "avx512f")]
3664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3665#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3666pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3667 transmute(src:vfmaddsub213ps(
3668 a:a.as_f32x16(),
3669 b:b.as_f32x16(),
3670 c:c.as_f32x16(),
3671 _MM_FROUND_CUR_DIRECTION,
3672 ))
3673}
3674
3675/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3676///
3677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
3678#[inline]
3679#[target_feature(enable = "avx512f")]
3680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3681#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3682pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3683 let fmaddsub: f32x16 = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
3684 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f32x16()))
3685}
3686
3687/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3694pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3695 let fmaddsub: f32x16 = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
3696 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3697 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3698}
3699
3700/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3701///
3702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
3703#[inline]
3704#[target_feature(enable = "avx512f")]
3705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3706#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3707pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3708 let fmaddsub: f32x16 = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
3709 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f32x16()))
3710}
3711
3712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3713///
3714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
3715#[inline]
3716#[target_feature(enable = "avx512f,avx512vl")]
3717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3718#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3719pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3720 let fmaddsub: f32x8 = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
3721 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f32x8()))
3722}
3723
3724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3725///
3726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
3727#[inline]
3728#[target_feature(enable = "avx512f,avx512vl")]
3729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3730#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3731pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3732 let fmaddsub: f32x8 = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
3733 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3734 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3735}
3736
3737/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3738///
3739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
3740#[inline]
3741#[target_feature(enable = "avx512f,avx512vl")]
3742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3743#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3744pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3745 let fmaddsub: f32x8 = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
3746 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f32x8()))
3747}
3748
3749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3750///
3751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
3752#[inline]
3753#[target_feature(enable = "avx512f,avx512vl")]
3754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3755#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3756pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3757 let fmaddsub: f32x4 = _mm_fmaddsub_ps(a, b, c).as_f32x4();
3758 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f32x4()))
3759}
3760
3761/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3762///
3763/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
3764#[inline]
3765#[target_feature(enable = "avx512f,avx512vl")]
3766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3767#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3768pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3769 let fmaddsub: f32x4 = _mm_fmaddsub_ps(a, b, c).as_f32x4();
3770 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
3771 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3772}
3773
3774/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3775///
3776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
3777#[inline]
3778#[target_feature(enable = "avx512f,avx512vl")]
3779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3780#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3781pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3782 let fmaddsub: f32x4 = _mm_fmaddsub_ps(a, b, c).as_f32x4();
3783 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f32x4()))
3784}
3785
3786/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3793pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3794 transmute(src:vfmaddsub213pd(
3795 a:a.as_f64x8(),
3796 b:b.as_f64x8(),
3797 c:c.as_f64x8(),
3798 _MM_FROUND_CUR_DIRECTION,
3799 ))
3800}
3801
3802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3803///
3804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
3805#[inline]
3806#[target_feature(enable = "avx512f")]
3807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3808#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3809pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3810 let fmaddsub: f64x8 = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
3811 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f64x8()))
3812}
3813
3814/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3815///
3816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
3817#[inline]
3818#[target_feature(enable = "avx512f")]
3819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3820#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3821pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3822 let fmaddsub: f64x8 = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
3823 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
3824 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3825}
3826
3827/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3828///
3829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
3830#[inline]
3831#[target_feature(enable = "avx512f")]
3832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3833#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3834pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3835 let fmaddsub: f64x8 = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
3836 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f64x8()))
3837}
3838
3839/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3840///
3841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
3842#[inline]
3843#[target_feature(enable = "avx512f,avx512vl")]
3844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3845#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3846pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3847 let fmaddsub: f64x4 = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
3848 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f64x4()))
3849}
3850
3851/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3852///
3853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
3854#[inline]
3855#[target_feature(enable = "avx512f,avx512vl")]
3856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3857#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3858pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3859 let fmaddsub: f64x4 = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
3860 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
3861 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3862}
3863
3864/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3865///
3866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
3867#[inline]
3868#[target_feature(enable = "avx512f,avx512vl")]
3869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3870#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3871pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3872 let fmaddsub: f64x4 = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
3873 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f64x4()))
3874}
3875
3876/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3877///
3878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
3879#[inline]
3880#[target_feature(enable = "avx512f,avx512vl")]
3881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3882#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3883pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3884 let fmaddsub: f64x2 = _mm_fmaddsub_pd(a, b, c).as_f64x2();
3885 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:a.as_f64x2()))
3886}
3887
3888/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3889///
3890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
3891#[inline]
3892#[target_feature(enable = "avx512f,avx512vl")]
3893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3894#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3895pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3896 let fmaddsub: f64x2 = _mm_fmaddsub_pd(a, b, c).as_f64x2();
3897 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
3898 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:zero))
3899}
3900
3901/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3902///
3903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
3904#[inline]
3905#[target_feature(enable = "avx512f,avx512vl")]
3906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3907#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3908pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3909 let fmaddsub: f64x2 = _mm_fmaddsub_pd(a, b, c).as_f64x2();
3910 transmute(src:simd_select_bitmask(m:k, yes:fmaddsub, no:c.as_f64x2()))
3911}
3912
3913/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3914///
3915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
3916#[inline]
3917#[target_feature(enable = "avx512f")]
3918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3919#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3920pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3921 let zero: f32x16 = mem::zeroed();
3922 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
3923 transmute(src:vfmaddsub213ps(
3924 a:a.as_f32x16(),
3925 b:b.as_f32x16(),
3926 c:sub,
3927 _MM_FROUND_CUR_DIRECTION,
3928 ))
3929}
3930
3931/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3932///
3933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
3934#[inline]
3935#[target_feature(enable = "avx512f")]
3936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3937#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3938pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3939 let fmsubadd: f32x16 = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
3940 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f32x16()))
3941}
3942
3943/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3944///
3945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
3946#[inline]
3947#[target_feature(enable = "avx512f")]
3948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3949#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3950pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3951 let fmsubadd: f32x16 = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
3952 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
3953 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
3954}
3955
3956/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3957///
3958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
3959#[inline]
3960#[target_feature(enable = "avx512f")]
3961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3962#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3963pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3964 let fmsubadd: f32x16 = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
3965 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f32x16()))
3966}
3967
3968/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3969///
3970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
3971#[inline]
3972#[target_feature(enable = "avx512f,avx512vl")]
3973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3974#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3975pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3976 let fmsubadd: f32x8 = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
3977 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f32x8()))
3978}
3979
3980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3981///
3982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
3983#[inline]
3984#[target_feature(enable = "avx512f,avx512vl")]
3985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3986#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3987pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3988 let fmsubadd: f32x8 = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
3989 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
3990 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
3991}
3992
3993/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
3996#[inline]
3997#[target_feature(enable = "avx512f,avx512vl")]
3998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3999#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4000pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4001 let fmsubadd: f32x8 = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
4002 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f32x8()))
4003}
4004
4005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4006///
4007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4008#[inline]
4009#[target_feature(enable = "avx512f,avx512vl")]
4010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4011#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4012pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4013 let fmsubadd: f32x4 = _mm_fmsubadd_ps(a, b, c).as_f32x4();
4014 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f32x4()))
4015}
4016
4017/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4018///
4019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4020#[inline]
4021#[target_feature(enable = "avx512f,avx512vl")]
4022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4023#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4024pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4025 let fmsubadd: f32x4 = _mm_fmsubadd_ps(a, b, c).as_f32x4();
4026 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
4027 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4028}
4029
4030/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4031///
4032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4033#[inline]
4034#[target_feature(enable = "avx512f,avx512vl")]
4035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4036#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4037pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4038 let fmsubadd: f32x4 = _mm_fmsubadd_ps(a, b, c).as_f32x4();
4039 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f32x4()))
4040}
4041
4042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4043///
4044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4045#[inline]
4046#[target_feature(enable = "avx512f")]
4047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4048#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4049pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4050 let zero: f64x8 = mem::zeroed();
4051 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
4052 transmute(src:vfmaddsub213pd(
4053 a:a.as_f64x8(),
4054 b:b.as_f64x8(),
4055 c:sub,
4056 _MM_FROUND_CUR_DIRECTION,
4057 ))
4058}
4059
4060/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4061///
4062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4063#[inline]
4064#[target_feature(enable = "avx512f")]
4065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4066#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4067pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4068 let fmsubadd: f64x8 = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
4069 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f64x8()))
4070}
4071
4072/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4073///
4074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4075#[inline]
4076#[target_feature(enable = "avx512f")]
4077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4078#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4079pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4080 let fmsubadd: f64x8 = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
4081 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
4082 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4083}
4084
4085/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4086///
4087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4088#[inline]
4089#[target_feature(enable = "avx512f")]
4090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4091#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4092pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4093 let fmsubadd: f64x8 = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
4094 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f64x8()))
4095}
4096
4097/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4098///
4099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4100#[inline]
4101#[target_feature(enable = "avx512f,avx512vl")]
4102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4103#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4104pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4105 let fmsubadd: f64x4 = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
4106 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f64x4()))
4107}
4108
4109/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4110///
4111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4112#[inline]
4113#[target_feature(enable = "avx512f,avx512vl")]
4114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4115#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4116pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4117 let fmsubadd: f64x4 = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
4118 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
4119 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4120}
4121
4122/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4123///
4124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4125#[inline]
4126#[target_feature(enable = "avx512f,avx512vl")]
4127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4128#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4129pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4130 let fmsubadd: f64x4 = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
4131 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f64x4()))
4132}
4133
4134/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4135///
4136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4137#[inline]
4138#[target_feature(enable = "avx512f,avx512vl")]
4139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4140#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4141pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4142 let fmsubadd: f64x2 = _mm_fmsubadd_pd(a, b, c).as_f64x2();
4143 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:a.as_f64x2()))
4144}
4145
4146/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4147///
4148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4149#[inline]
4150#[target_feature(enable = "avx512f,avx512vl")]
4151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4152#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4153pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4154 let fmsubadd: f64x2 = _mm_fmsubadd_pd(a, b, c).as_f64x2();
4155 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
4156 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:zero))
4157}
4158
4159/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4160///
4161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4162#[inline]
4163#[target_feature(enable = "avx512f,avx512vl")]
4164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4165#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4166pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4167 let fmsubadd: f64x2 = _mm_fmsubadd_pd(a, b, c).as_f64x2();
4168 transmute(src:simd_select_bitmask(m:k, yes:fmsubadd, no:c.as_f64x2()))
4169}
4170
4171/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4172///
4173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4174#[inline]
4175#[target_feature(enable = "avx512f")]
4176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4177#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4178pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4179 let zero: f32x16 = mem::zeroed();
4180 let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
4181 transmute(src:vfmadd132ps(a:sub, b:b.as_f32x16(), c:c.as_f32x16()))
4182}
4183
4184/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4185///
4186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4187#[inline]
4188#[target_feature(enable = "avx512f")]
4189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4190#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4191pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4192 let fnmadd: f32x16 = _mm512_fnmadd_ps(a, b, c).as_f32x16();
4193 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f32x16()))
4194}
4195
4196/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4197///
4198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4199#[inline]
4200#[target_feature(enable = "avx512f")]
4201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4202#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4203pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4204 let fnmadd: f32x16 = _mm512_fnmadd_ps(a, b, c).as_f32x16();
4205 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
4206 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4207}
4208
4209/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4210///
4211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4212#[inline]
4213#[target_feature(enable = "avx512f")]
4214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4215#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4216pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4217 let fnmadd: f32x16 = _mm512_fnmadd_ps(a, b, c).as_f32x16();
4218 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f32x16()))
4219}
4220
4221/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4222///
4223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4224#[inline]
4225#[target_feature(enable = "avx512f,avx512vl")]
4226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4227#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4228pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4229 let fnmadd: f32x8 = _mm256_fnmadd_ps(a, b, c).as_f32x8();
4230 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f32x8()))
4231}
4232
4233/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4234///
4235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4236#[inline]
4237#[target_feature(enable = "avx512f,avx512vl")]
4238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4239#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4240pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4241 let fnmadd: f32x8 = _mm256_fnmadd_ps(a, b, c).as_f32x8();
4242 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
4243 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4249#[inline]
4250#[target_feature(enable = "avx512f,avx512vl")]
4251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4253pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4254 let fnmadd: f32x8 = _mm256_fnmadd_ps(a, b, c).as_f32x8();
4255 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f32x8()))
4256}
4257
4258/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4259///
4260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4261#[inline]
4262#[target_feature(enable = "avx512f,avx512vl")]
4263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4264#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4265pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4266 let fnmadd: f32x4 = _mm_fnmadd_ps(a, b, c).as_f32x4();
4267 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f32x4()))
4268}
4269
4270/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4271///
4272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4273#[inline]
4274#[target_feature(enable = "avx512f,avx512vl")]
4275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4276#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4277pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4278 let fnmadd: f32x4 = _mm_fnmadd_ps(a, b, c).as_f32x4();
4279 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
4280 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4281}
4282
4283/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4284///
4285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4286#[inline]
4287#[target_feature(enable = "avx512f,avx512vl")]
4288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4289#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4290pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4291 let fnmadd: f32x4 = _mm_fnmadd_ps(a, b, c).as_f32x4();
4292 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f32x4()))
4293}
4294
4295/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4296///
4297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4298#[inline]
4299#[target_feature(enable = "avx512f")]
4300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4301#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4302pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4303 let zero: f64x8 = mem::zeroed();
4304 let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
4305 transmute(src:vfmadd132pd(a:sub, b:b.as_f64x8(), c:c.as_f64x8()))
4306}
4307
4308/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4309///
4310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4311#[inline]
4312#[target_feature(enable = "avx512f")]
4313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4314#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4315pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4316 let fnmadd: f64x8 = _mm512_fnmadd_pd(a, b, c).as_f64x8();
4317 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f64x8()))
4318}
4319
4320/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4321///
4322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4323#[inline]
4324#[target_feature(enable = "avx512f")]
4325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4326#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4327pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4328 let fnmadd: f64x8 = _mm512_fnmadd_pd(a, b, c).as_f64x8();
4329 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
4330 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4331}
4332
4333/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4334///
4335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4336#[inline]
4337#[target_feature(enable = "avx512f")]
4338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4339#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4340pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4341 let fnmadd: f64x8 = _mm512_fnmadd_pd(a, b, c).as_f64x8();
4342 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f64x8()))
4343}
4344
4345/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4346///
4347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4348#[inline]
4349#[target_feature(enable = "avx512f,avx512vl")]
4350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4351#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4352pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4353 let fnmadd: f64x4 = _mm256_fnmadd_pd(a, b, c).as_f64x4();
4354 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f64x4()))
4355}
4356
4357/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4358///
4359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4360#[inline]
4361#[target_feature(enable = "avx512f,avx512vl")]
4362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4363#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4364pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4365 let fnmadd: f64x4 = _mm256_fnmadd_pd(a, b, c).as_f64x4();
4366 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
4367 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4368}
4369
4370/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4371///
4372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4373#[inline]
4374#[target_feature(enable = "avx512f,avx512vl")]
4375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4376#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4377pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4378 let fnmadd: f64x4 = _mm256_fnmadd_pd(a, b, c).as_f64x4();
4379 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f64x4()))
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4389pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4390 let fnmadd: f64x2 = _mm_fnmadd_pd(a, b, c).as_f64x2();
4391 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:a.as_f64x2()))
4392}
4393
4394/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4395///
4396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4397#[inline]
4398#[target_feature(enable = "avx512f,avx512vl")]
4399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4400#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4401pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4402 let fnmadd: f64x2 = _mm_fnmadd_pd(a, b, c).as_f64x2();
4403 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
4404 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:zero))
4405}
4406
4407/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4410#[inline]
4411#[target_feature(enable = "avx512f,avx512vl")]
4412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4413#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4414pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4415 let fnmadd: f64x2 = _mm_fnmadd_pd(a, b, c).as_f64x2();
4416 transmute(src:simd_select_bitmask(m:k, yes:fnmadd, no:c.as_f64x2()))
4417}
4418
4419/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4420///
4421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4422#[inline]
4423#[target_feature(enable = "avx512f")]
4424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4425#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4426pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4427 let zero: f32x16 = mem::zeroed();
4428 let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
4429 let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
4430 transmute(src:vfmadd132ps(a:suba, b:b.as_f32x16(), c:subc))
4431}
4432
4433/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4434///
4435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4436#[inline]
4437#[target_feature(enable = "avx512f")]
4438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4439#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4440pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4441 let fnmsub: f32x16 = _mm512_fnmsub_ps(a, b, c).as_f32x16();
4442 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f32x16()))
4443}
4444
4445/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4446///
4447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4448#[inline]
4449#[target_feature(enable = "avx512f")]
4450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4451#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4452pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4453 let fnmsub: f32x16 = _mm512_fnmsub_ps(a, b, c).as_f32x16();
4454 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
4455 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4456}
4457
4458/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4459///
4460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4461#[inline]
4462#[target_feature(enable = "avx512f")]
4463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4464#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4465pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4466 let fnmsub: f32x16 = _mm512_fnmsub_ps(a, b, c).as_f32x16();
4467 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f32x16()))
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4473#[inline]
4474#[target_feature(enable = "avx512f,avx512vl")]
4475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4477pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4478 let fnmsub: f32x8 = _mm256_fnmsub_ps(a, b, c).as_f32x8();
4479 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f32x8()))
4480}
4481
4482/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4483///
4484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4485#[inline]
4486#[target_feature(enable = "avx512f,avx512vl")]
4487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4488#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4489pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4490 let fnmsub: f32x8 = _mm256_fnmsub_ps(a, b, c).as_f32x8();
4491 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
4492 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4493}
4494
4495/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4496///
4497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4498#[inline]
4499#[target_feature(enable = "avx512f,avx512vl")]
4500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4501#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4502pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4503 let fnmsub: f32x8 = _mm256_fnmsub_ps(a, b, c).as_f32x8();
4504 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f32x8()))
4505}
4506
4507/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4508///
4509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4510#[inline]
4511#[target_feature(enable = "avx512f,avx512vl")]
4512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4513#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4514pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4515 let fnmsub: f32x4 = _mm_fnmsub_ps(a, b, c).as_f32x4();
4516 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f32x4()))
4517}
4518
4519/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4520///
4521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4522#[inline]
4523#[target_feature(enable = "avx512f,avx512vl")]
4524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4525#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4526pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4527 let fnmsub: f32x4 = _mm_fnmsub_ps(a, b, c).as_f32x4();
4528 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
4529 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4530}
4531
4532/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4533///
4534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4535#[inline]
4536#[target_feature(enable = "avx512f,avx512vl")]
4537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4538#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4539pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4540 let fnmsub: f32x4 = _mm_fnmsub_ps(a, b, c).as_f32x4();
4541 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f32x4()))
4542}
4543
4544/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4545///
4546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4547#[inline]
4548#[target_feature(enable = "avx512f")]
4549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4550#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4551pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4552 let zero: f64x8 = mem::zeroed();
4553 let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
4554 let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
4555 transmute(src:vfmadd132pd(a:suba, b:b.as_f64x8(), c:subc))
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4565pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4566 let fnmsub: f64x8 = _mm512_fnmsub_pd(a, b, c).as_f64x8();
4567 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f64x8()))
4568}
4569
4570/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4571///
4572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4573#[inline]
4574#[target_feature(enable = "avx512f")]
4575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4576#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4577pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4578 let fnmsub: f64x8 = _mm512_fnmsub_pd(a, b, c).as_f64x8();
4579 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
4580 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4581}
4582
4583/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4584///
4585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4586#[inline]
4587#[target_feature(enable = "avx512f")]
4588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4589#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4590pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4591 let fnmsub: f64x8 = _mm512_fnmsub_pd(a, b, c).as_f64x8();
4592 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f64x8()))
4593}
4594
4595/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4596///
4597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4598#[inline]
4599#[target_feature(enable = "avx512f,avx512vl")]
4600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4601#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4602pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4603 let fnmsub: f64x4 = _mm256_fnmsub_pd(a, b, c).as_f64x4();
4604 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f64x4()))
4605}
4606
4607/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4608///
4609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4610#[inline]
4611#[target_feature(enable = "avx512f,avx512vl")]
4612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4613#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4614pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4615 let fnmsub: f64x4 = _mm256_fnmsub_pd(a, b, c).as_f64x4();
4616 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
4617 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4618}
4619
4620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4621///
4622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4623#[inline]
4624#[target_feature(enable = "avx512f,avx512vl")]
4625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4626#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4627pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4628 let fnmsub: f64x4 = _mm256_fnmsub_pd(a, b, c).as_f64x4();
4629 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f64x4()))
4630}
4631
4632/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4633///
4634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4635#[inline]
4636#[target_feature(enable = "avx512f,avx512vl")]
4637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4638#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4639pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4640 let fnmsub: f64x2 = _mm_fnmsub_pd(a, b, c).as_f64x2();
4641 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:a.as_f64x2()))
4642}
4643
4644/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4645///
4646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4647#[inline]
4648#[target_feature(enable = "avx512f,avx512vl")]
4649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4650#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4651pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4652 let fnmsub: f64x2 = _mm_fnmsub_pd(a, b, c).as_f64x2();
4653 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
4654 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:zero))
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4664pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665 let fnmsub: f64x2 = _mm_fnmsub_pd(a, b, c).as_f64x2();
4666 transmute(src:simd_select_bitmask(m:k, yes:fnmsub, no:c.as_f64x2()))
4667}
4668
4669/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4670///
4671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4672#[inline]
4673#[target_feature(enable = "avx512f")]
4674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4675#[cfg_attr(test, assert_instr(vrcp14ps))]
4676pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4677 transmute(src:vrcp14ps(
4678 a:a.as_f32x16(),
4679 src:_mm512_setzero_ps().as_f32x16(),
4680 m:0b11111111_11111111,
4681 ))
4682}
4683
4684/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4685///
4686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4687#[inline]
4688#[target_feature(enable = "avx512f")]
4689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4690#[cfg_attr(test, assert_instr(vrcp14ps))]
4691pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4692 transmute(src:vrcp14ps(a:a.as_f32x16(), src:src.as_f32x16(), m:k))
4693}
4694
4695/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4696///
4697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4698#[inline]
4699#[target_feature(enable = "avx512f")]
4700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4701#[cfg_attr(test, assert_instr(vrcp14ps))]
4702pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4703 transmute(src:vrcp14ps(a:a.as_f32x16(), src:_mm512_setzero_ps().as_f32x16(), m:k))
4704}
4705
4706/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4707///
4708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4709#[inline]
4710#[target_feature(enable = "avx512f,avx512vl")]
4711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4712#[cfg_attr(test, assert_instr(vrcp14ps))]
4713pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4714 transmute(src:vrcp14ps256(
4715 a:a.as_f32x8(),
4716 src:_mm256_setzero_ps().as_f32x8(),
4717 m:0b11111111,
4718 ))
4719}
4720
4721/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4722///
4723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4724#[inline]
4725#[target_feature(enable = "avx512f,avx512vl")]
4726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4727#[cfg_attr(test, assert_instr(vrcp14ps))]
4728pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4729 transmute(src:vrcp14ps256(a:a.as_f32x8(), src:src.as_f32x8(), m:k))
4730}
4731
4732/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4733///
4734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4735#[inline]
4736#[target_feature(enable = "avx512f,avx512vl")]
4737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4738#[cfg_attr(test, assert_instr(vrcp14ps))]
4739pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4740 transmute(src:vrcp14ps256(a:a.as_f32x8(), src:_mm256_setzero_ps().as_f32x8(), m:k))
4741}
4742
4743/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4744///
4745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4746#[inline]
4747#[target_feature(enable = "avx512f,avx512vl")]
4748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4749#[cfg_attr(test, assert_instr(vrcp14ps))]
4750pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 {
4751 transmute(src:vrcp14ps128(
4752 a:a.as_f32x4(),
4753 src:_mm_setzero_ps().as_f32x4(),
4754 m:0b00001111,
4755 ))
4756}
4757
4758/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4759///
4760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4761#[inline]
4762#[target_feature(enable = "avx512f,avx512vl")]
4763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4764#[cfg_attr(test, assert_instr(vrcp14ps))]
4765pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4766 transmute(src:vrcp14ps128(a:a.as_f32x4(), src:src.as_f32x4(), m:k))
4767}
4768
4769/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4770///
4771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4772#[inline]
4773#[target_feature(enable = "avx512f,avx512vl")]
4774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4775#[cfg_attr(test, assert_instr(vrcp14ps))]
4776pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4777 transmute(src:vrcp14ps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), m:k))
4778}
4779
4780/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4781///
4782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4783#[inline]
4784#[target_feature(enable = "avx512f")]
4785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4786#[cfg_attr(test, assert_instr(vrcp14pd))]
4787pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4788 transmute(src:vrcp14pd(
4789 a:a.as_f64x8(),
4790 src:_mm512_setzero_pd().as_f64x8(),
4791 m:0b11111111,
4792 ))
4793}
4794
4795/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4796///
4797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
4798#[inline]
4799#[target_feature(enable = "avx512f")]
4800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4801#[cfg_attr(test, assert_instr(vrcp14pd))]
4802pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
4803 transmute(src:vrcp14pd(a:a.as_f64x8(), src:src.as_f64x8(), m:k))
4804}
4805
4806/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4807///
4808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
4809#[inline]
4810#[target_feature(enable = "avx512f")]
4811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4812#[cfg_attr(test, assert_instr(vrcp14pd))]
4813pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
4814 transmute(src:vrcp14pd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), m:k))
4815}
4816
4817/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4818///
4819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
4820#[inline]
4821#[target_feature(enable = "avx512f,avx512vl")]
4822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4823#[cfg_attr(test, assert_instr(vrcp14pd))]
4824pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
4825 transmute(src:vrcp14pd256(
4826 a:a.as_f64x4(),
4827 src:_mm256_setzero_pd().as_f64x4(),
4828 m:0b00001111,
4829 ))
4830}
4831
4832/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4833///
4834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
4835#[inline]
4836#[target_feature(enable = "avx512f,avx512vl")]
4837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4838#[cfg_attr(test, assert_instr(vrcp14pd))]
4839pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
4840 transmute(src:vrcp14pd256(a:a.as_f64x4(), src:src.as_f64x4(), m:k))
4841}
4842
4843/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4844///
4845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
4846#[inline]
4847#[target_feature(enable = "avx512f,avx512vl")]
4848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4849#[cfg_attr(test, assert_instr(vrcp14pd))]
4850pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
4851 transmute(src:vrcp14pd256(a:a.as_f64x4(), src:_mm256_setzero_pd().as_f64x4(), m:k))
4852}
4853
4854/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4855///
4856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
4857#[inline]
4858#[target_feature(enable = "avx512f,avx512vl")]
4859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4860#[cfg_attr(test, assert_instr(vrcp14pd))]
4861pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d {
4862 transmute(src:vrcp14pd128(
4863 a:a.as_f64x2(),
4864 src:_mm_setzero_pd().as_f64x2(),
4865 m:0b00000011,
4866 ))
4867}
4868
4869/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4870///
4871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
4872#[inline]
4873#[target_feature(enable = "avx512f,avx512vl")]
4874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4875#[cfg_attr(test, assert_instr(vrcp14pd))]
4876pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
4877 transmute(src:vrcp14pd128(a:a.as_f64x2(), src:src.as_f64x2(), m:k))
4878}
4879
4880/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4881///
4882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
4883#[inline]
4884#[target_feature(enable = "avx512f,avx512vl")]
4885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4886#[cfg_attr(test, assert_instr(vrcp14pd))]
4887pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
4888 transmute(src:vrcp14pd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), m:k))
4889}
4890
4891/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4892///
4893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
4894#[inline]
4895#[target_feature(enable = "avx512f")]
4896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4897#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4898pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
4899 transmute(src:vrsqrt14ps(
4900 a:a.as_f32x16(),
4901 src:_mm512_setzero_ps().as_f32x16(),
4902 m:0b11111111_11111111,
4903 ))
4904}
4905
4906/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4907///
4908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
4909#[inline]
4910#[target_feature(enable = "avx512f")]
4911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4912#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4913pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4914 transmute(src:vrsqrt14ps(a:a.as_f32x16(), src:src.as_f32x16(), m:k))
4915}
4916
4917/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4918///
4919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
4920#[inline]
4921#[target_feature(enable = "avx512f")]
4922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4923#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4924pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
4925 transmute(src:vrsqrt14ps(
4926 a:a.as_f32x16(),
4927 src:_mm512_setzero_ps().as_f32x16(),
4928 m:k,
4929 ))
4930}
4931
4932/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4939pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940 transmute(src:vrsqrt14ps256(a:a.as_f32x8(), src:src.as_f32x8(), m:k))
4941}
4942
4943/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4950pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
4951 transmute(src:vrsqrt14ps256(
4952 a:a.as_f32x8(),
4953 src:_mm256_setzero_ps().as_f32x8(),
4954 m:k,
4955 ))
4956}
4957
4958/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4959///
4960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
4961#[inline]
4962#[target_feature(enable = "avx512f,avx512vl")]
4963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4964#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4965pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4966 transmute(src:vrsqrt14ps128(a:a.as_f32x4(), src:src.as_f32x4(), m:k))
4967}
4968
4969/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4970///
4971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
4972#[inline]
4973#[target_feature(enable = "avx512f,avx512vl")]
4974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4975#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4976pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
4977 transmute(src:vrsqrt14ps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), m:k))
4978}
4979
4980/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4981///
4982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
4983#[inline]
4984#[target_feature(enable = "avx512f")]
4985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4986#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4987pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
4988 transmute(src:vrsqrt14pd(
4989 a:a.as_f64x8(),
4990 src:_mm512_setzero_pd().as_f64x8(),
4991 m:0b11111111,
4992 ))
4993}
4994
4995/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4996///
4997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
4998#[inline]
4999#[target_feature(enable = "avx512f")]
5000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5001#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5002pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5003 transmute(src:vrsqrt14pd(a:a.as_f64x8(), src:src.as_f64x8(), m:k))
5004}
5005
5006/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5007///
5008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5009#[inline]
5010#[target_feature(enable = "avx512f")]
5011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5012#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5013pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5014 transmute(src:vrsqrt14pd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), m:k))
5015}
5016
5017/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5018///
5019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5020#[inline]
5021#[target_feature(enable = "avx512f,avx512vl")]
5022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5023#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5024pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5025 transmute(src:vrsqrt14pd256(a:a.as_f64x4(), src:src.as_f64x4(), m:k))
5026}
5027
5028/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5029///
5030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5031#[inline]
5032#[target_feature(enable = "avx512f,avx512vl")]
5033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5034#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5035pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5036 transmute(src:vrsqrt14pd256(
5037 a:a.as_f64x4(),
5038 src:_mm256_setzero_pd().as_f64x4(),
5039 m:k,
5040 ))
5041}
5042
5043/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5044///
5045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5046#[inline]
5047#[target_feature(enable = "avx512f,avx512vl")]
5048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5049#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5050pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5051 transmute(src:vrsqrt14pd128(a:a.as_f64x2(), src:src.as_f64x2(), m:k))
5052}
5053
5054/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5055///
5056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5057#[inline]
5058#[target_feature(enable = "avx512f,avx512vl")]
5059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5060#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5061pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5062 transmute(src:vrsqrt14pd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), m:k))
5063}
5064
5065/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5066///
5067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5068#[inline]
5069#[target_feature(enable = "avx512f")]
5070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5071#[cfg_attr(test, assert_instr(vgetexpps))]
5072pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
5073 transmute(src:vgetexpps(
5074 a:a.as_f32x16(),
5075 src:_mm512_setzero_ps().as_f32x16(),
5076 m:0b11111111_11111111,
5077 _MM_FROUND_CUR_DIRECTION,
5078 ))
5079}
5080
5081/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5082///
5083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5084#[inline]
5085#[target_feature(enable = "avx512f")]
5086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5087#[cfg_attr(test, assert_instr(vgetexpps))]
5088pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5089 transmute(src:vgetexpps(
5090 a:a.as_f32x16(),
5091 src:src.as_f32x16(),
5092 m:k,
5093 _MM_FROUND_CUR_DIRECTION,
5094 ))
5095}
5096
5097/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103#[cfg_attr(test, assert_instr(vgetexpps))]
5104pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5105 transmute(src:vgetexpps(
5106 a:a.as_f32x16(),
5107 src:_mm512_setzero_ps().as_f32x16(),
5108 m:k,
5109 _MM_FROUND_CUR_DIRECTION,
5110 ))
5111}
5112
5113/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5114///
5115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5116#[inline]
5117#[target_feature(enable = "avx512f,avx512vl")]
5118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5119#[cfg_attr(test, assert_instr(vgetexpps))]
5120pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 {
5121 transmute(src:vgetexpps256(
5122 a:a.as_f32x8(),
5123 src:_mm256_setzero_ps().as_f32x8(),
5124 m:0b11111111,
5125 ))
5126}
5127
5128/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5129///
5130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5131#[inline]
5132#[target_feature(enable = "avx512f,avx512vl")]
5133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5134#[cfg_attr(test, assert_instr(vgetexpps))]
5135pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5136 transmute(src:vgetexpps256(a:a.as_f32x8(), src:src.as_f32x8(), m:k))
5137}
5138
5139/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5140///
5141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5142#[inline]
5143#[target_feature(enable = "avx512f,avx512vl")]
5144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5145#[cfg_attr(test, assert_instr(vgetexpps))]
5146pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5147 transmute(src:vgetexpps256(
5148 a:a.as_f32x8(),
5149 src:_mm256_setzero_ps().as_f32x8(),
5150 m:k,
5151 ))
5152}
5153
5154/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5155///
5156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5157#[inline]
5158#[target_feature(enable = "avx512f,avx512vl")]
5159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5160#[cfg_attr(test, assert_instr(vgetexpps))]
5161pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 {
5162 transmute(src:vgetexpps128(
5163 a:a.as_f32x4(),
5164 src:_mm_setzero_ps().as_f32x4(),
5165 m:0b00001111,
5166 ))
5167}
5168
5169/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5170///
5171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5172#[inline]
5173#[target_feature(enable = "avx512f,avx512vl")]
5174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5175#[cfg_attr(test, assert_instr(vgetexpps))]
5176pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5177 transmute(src:vgetexpps128(a:a.as_f32x4(), src:src.as_f32x4(), m:k))
5178}
5179
5180/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5183#[inline]
5184#[target_feature(enable = "avx512f,avx512vl")]
5185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5186#[cfg_attr(test, assert_instr(vgetexpps))]
5187pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5188 transmute(src:vgetexpps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), m:k))
5189}
5190
5191/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5192///
5193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5194#[inline]
5195#[target_feature(enable = "avx512f")]
5196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5197#[cfg_attr(test, assert_instr(vgetexppd))]
5198pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5199 transmute(src:vgetexppd(
5200 a:a.as_f64x8(),
5201 src:_mm512_setzero_pd().as_f64x8(),
5202 m:0b11111111,
5203 _MM_FROUND_CUR_DIRECTION,
5204 ))
5205}
5206
5207/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213#[cfg_attr(test, assert_instr(vgetexppd))]
5214pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5215 transmute(src:vgetexppd(
5216 a:a.as_f64x8(),
5217 src:src.as_f64x8(),
5218 m:k,
5219 _MM_FROUND_CUR_DIRECTION,
5220 ))
5221}
5222
5223/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5224///
5225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5226#[inline]
5227#[target_feature(enable = "avx512f")]
5228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5229#[cfg_attr(test, assert_instr(vgetexppd))]
5230pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5231 transmute(src:vgetexppd(
5232 a:a.as_f64x8(),
5233 src:_mm512_setzero_pd().as_f64x8(),
5234 m:k,
5235 _MM_FROUND_CUR_DIRECTION,
5236 ))
5237}
5238
5239/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5240///
5241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5242#[inline]
5243#[target_feature(enable = "avx512f,avx512vl")]
5244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5245#[cfg_attr(test, assert_instr(vgetexppd))]
5246pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5247 transmute(src:vgetexppd256(
5248 a:a.as_f64x4(),
5249 src:_mm256_setzero_pd().as_f64x4(),
5250 m:0b00001111,
5251 ))
5252}
5253
5254/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5255///
5256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5257#[inline]
5258#[target_feature(enable = "avx512f,avx512vl")]
5259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5260#[cfg_attr(test, assert_instr(vgetexppd))]
5261pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5262 transmute(src:vgetexppd256(a:a.as_f64x4(), src:src.as_f64x4(), m:k))
5263}
5264
5265/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5266///
5267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5268#[inline]
5269#[target_feature(enable = "avx512f,avx512vl")]
5270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5271#[cfg_attr(test, assert_instr(vgetexppd))]
5272pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5273 transmute(src:vgetexppd256(
5274 a:a.as_f64x4(),
5275 src:_mm256_setzero_pd().as_f64x4(),
5276 m:k,
5277 ))
5278}
5279
5280/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5281///
5282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5283#[inline]
5284#[target_feature(enable = "avx512f,avx512vl")]
5285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5286#[cfg_attr(test, assert_instr(vgetexppd))]
5287pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d {
5288 transmute(src:vgetexppd128(
5289 a:a.as_f64x2(),
5290 src:_mm_setzero_pd().as_f64x2(),
5291 m:0b00000011,
5292 ))
5293}
5294
5295/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5296///
5297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5298#[inline]
5299#[target_feature(enable = "avx512f,avx512vl")]
5300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5301#[cfg_attr(test, assert_instr(vgetexppd))]
5302pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5303 transmute(src:vgetexppd128(a:a.as_f64x2(), src:src.as_f64x2(), m:k))
5304}
5305
5306/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5307///
5308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5309#[inline]
5310#[target_feature(enable = "avx512f,avx512vl")]
5311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5312#[cfg_attr(test, assert_instr(vgetexppd))]
5313pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5314 transmute(src:vgetexppd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), m:k))
5315}
5316
5317/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5318/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5319/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5320/// _MM_FROUND_TO_NEG_INF // round down\
5321/// _MM_FROUND_TO_POS_INF // round up\
5322/// _MM_FROUND_TO_ZERO // truncate\
5323/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5324///
5325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5326#[inline]
5327#[target_feature(enable = "avx512f")]
5328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5329#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5330#[rustc_legacy_const_generics(1)]
5331pub unsafe fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5332 static_assert_uimm_bits!(IMM8, 8);
5333 let a: f32x16 = a.as_f32x16();
5334 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
5335 let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
5336 transmute(src:r)
5337}
5338
5339/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5340/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5341/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5342/// _MM_FROUND_TO_NEG_INF // round down\
5343/// _MM_FROUND_TO_POS_INF // round up\
5344/// _MM_FROUND_TO_ZERO // truncate\
5345/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5346///
5347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5348#[inline]
5349#[target_feature(enable = "avx512f")]
5350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5351#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5352#[rustc_legacy_const_generics(3)]
5353pub unsafe fn _mm512_mask_roundscale_ps<const IMM8: i32>(
5354 src: __m512,
5355 k: __mmask16,
5356 a: __m512,
5357) -> __m512 {
5358 static_assert_uimm_bits!(IMM8, 8);
5359 let a: f32x16 = a.as_f32x16();
5360 let src: f32x16 = src.as_f32x16();
5361 let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5362 transmute(src:r)
5363}
5364
5365/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5366/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5367/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5368/// _MM_FROUND_TO_NEG_INF // round down\
5369/// _MM_FROUND_TO_POS_INF // round up\
5370/// _MM_FROUND_TO_ZERO // truncate\
5371/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5374#[inline]
5375#[target_feature(enable = "avx512f")]
5376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5378#[rustc_legacy_const_generics(2)]
5379pub unsafe fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5380 static_assert_uimm_bits!(IMM8, 8);
5381 let a: f32x16 = a.as_f32x16();
5382 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
5383 let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:k, _MM_FROUND_CUR_DIRECTION);
5384 transmute(src:r)
5385}
5386
5387/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5388/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5389/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5390/// _MM_FROUND_TO_NEG_INF // round down\
5391/// _MM_FROUND_TO_POS_INF // round up\
5392/// _MM_FROUND_TO_ZERO // truncate\
5393/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5400#[rustc_legacy_const_generics(1)]
5401pub unsafe fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5402 static_assert_uimm_bits!(IMM8, 8);
5403 let a: f32x8 = a.as_f32x8();
5404 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
5405 let r: f32x8 = vrndscaleps256(a, IMM8, src:zero, mask:0b11111111);
5406 transmute(src:r)
5407}
5408
5409/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5410/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5411/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5412/// _MM_FROUND_TO_NEG_INF // round down\
5413/// _MM_FROUND_TO_POS_INF // round up\
5414/// _MM_FROUND_TO_ZERO // truncate\
5415/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5416///
5417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5418#[inline]
5419#[target_feature(enable = "avx512f,avx512vl")]
5420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5421#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5422#[rustc_legacy_const_generics(3)]
5423pub unsafe fn _mm256_mask_roundscale_ps<const IMM8: i32>(
5424 src: __m256,
5425 k: __mmask8,
5426 a: __m256,
5427) -> __m256 {
5428 static_assert_uimm_bits!(IMM8, 8);
5429 let a: f32x8 = a.as_f32x8();
5430 let src: f32x8 = src.as_f32x8();
5431 let r: f32x8 = vrndscaleps256(a, IMM8, src, mask:k);
5432 transmute(src:r)
5433}
5434
5435/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5436/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5437/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5438/// _MM_FROUND_TO_NEG_INF // round down\
5439/// _MM_FROUND_TO_POS_INF // round up\
5440/// _MM_FROUND_TO_ZERO // truncate\
5441/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5442///
5443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5444#[inline]
5445#[target_feature(enable = "avx512f,avx512vl")]
5446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5447#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5448#[rustc_legacy_const_generics(2)]
5449pub unsafe fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5450 static_assert_uimm_bits!(IMM8, 8);
5451 let a: f32x8 = a.as_f32x8();
5452 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
5453 let r: f32x8 = vrndscaleps256(a, IMM8, src:zero, mask:k);
5454 transmute(src:r)
5455}
5456
5457/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5458/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5459/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5460/// _MM_FROUND_TO_NEG_INF // round down\
5461/// _MM_FROUND_TO_POS_INF // round up\
5462/// _MM_FROUND_TO_ZERO // truncate\
5463/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5464///
5465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5466#[inline]
5467#[target_feature(enable = "avx512f,avx512vl")]
5468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5469#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5470#[rustc_legacy_const_generics(1)]
5471pub unsafe fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5472 static_assert_uimm_bits!(IMM8, 8);
5473 let a: f32x4 = a.as_f32x4();
5474 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
5475 let r: f32x4 = vrndscaleps128(a, IMM8, src:zero, mask:0b00001111);
5476 transmute(src:r)
5477}
5478
5479/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5480/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5481/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5482/// _MM_FROUND_TO_NEG_INF // round down\
5483/// _MM_FROUND_TO_POS_INF // round up\
5484/// _MM_FROUND_TO_ZERO // truncate\
5485/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5486///
5487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5488#[inline]
5489#[target_feature(enable = "avx512f,avx512vl")]
5490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5491#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5492#[rustc_legacy_const_generics(3)]
5493pub unsafe fn _mm_mask_roundscale_ps<const IMM8: i32>(
5494 src: __m128,
5495 k: __mmask8,
5496 a: __m128,
5497) -> __m128 {
5498 static_assert_uimm_bits!(IMM8, 8);
5499 let a: f32x4 = a.as_f32x4();
5500 let src: f32x4 = src.as_f32x4();
5501 let r: f32x4 = vrndscaleps128(a, IMM8, src, mask:k);
5502 transmute(src:r)
5503}
5504
5505/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5506/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5507/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5508/// _MM_FROUND_TO_NEG_INF // round down\
5509/// _MM_FROUND_TO_POS_INF // round up\
5510/// _MM_FROUND_TO_ZERO // truncate\
5511/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5512///
5513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5514#[inline]
5515#[target_feature(enable = "avx512f,avx512vl")]
5516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5517#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5518#[rustc_legacy_const_generics(2)]
5519pub unsafe fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5520 static_assert_uimm_bits!(IMM8, 8);
5521 let a: f32x4 = a.as_f32x4();
5522 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
5523 let r: f32x4 = vrndscaleps128(a, IMM8, src:zero, mask:k);
5524 transmute(src:r)
5525}
5526
5527/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5528/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5529/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5530/// _MM_FROUND_TO_NEG_INF // round down\
5531/// _MM_FROUND_TO_POS_INF // round up\
5532/// _MM_FROUND_TO_ZERO // truncate\
5533/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5534///
5535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5536#[inline]
5537#[target_feature(enable = "avx512f")]
5538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5539#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5540#[rustc_legacy_const_generics(1)]
5541pub unsafe fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5542 static_assert_uimm_bits!(IMM8, 8);
5543 let a: f64x8 = a.as_f64x8();
5544 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
5545 let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
5546 transmute(src:r)
5547}
5548
5549/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5550/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5551/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5552/// _MM_FROUND_TO_NEG_INF // round down\
5553/// _MM_FROUND_TO_POS_INF // round up\
5554/// _MM_FROUND_TO_ZERO // truncate\
5555/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5556///
5557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5558#[inline]
5559#[target_feature(enable = "avx512f")]
5560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5561#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5562#[rustc_legacy_const_generics(3)]
5563pub unsafe fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5564 src: __m512d,
5565 k: __mmask8,
5566 a: __m512d,
5567) -> __m512d {
5568 static_assert_uimm_bits!(IMM8, 8);
5569 let a: f64x8 = a.as_f64x8();
5570 let src: f64x8 = src.as_f64x8();
5571 let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5572 transmute(src:r)
5573}
5574
5575/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5576/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5577/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5578/// _MM_FROUND_TO_NEG_INF // round down\
5579/// _MM_FROUND_TO_POS_INF // round up\
5580/// _MM_FROUND_TO_ZERO // truncate\
5581/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5582///
5583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5584#[inline]
5585#[target_feature(enable = "avx512f")]
5586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5587#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5588#[rustc_legacy_const_generics(2)]
5589pub unsafe fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5590 static_assert_uimm_bits!(IMM8, 8);
5591 let a: f64x8 = a.as_f64x8();
5592 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
5593 let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:k, _MM_FROUND_CUR_DIRECTION);
5594 transmute(src:r)
5595}
5596
5597/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5598/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5599/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5600/// _MM_FROUND_TO_NEG_INF // round down\
5601/// _MM_FROUND_TO_POS_INF // round up\
5602/// _MM_FROUND_TO_ZERO // truncate\
5603/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5604///
5605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5606#[inline]
5607#[target_feature(enable = "avx512f,avx512vl")]
5608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5609#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5610#[rustc_legacy_const_generics(1)]
5611pub unsafe fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5612 static_assert_uimm_bits!(IMM8, 8);
5613 let a: f64x4 = a.as_f64x4();
5614 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
5615 let r: f64x4 = vrndscalepd256(a, IMM8, src:zero, mask:0b00001111);
5616 transmute(src:r)
5617}
5618
5619/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5620/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5621/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5622/// _MM_FROUND_TO_NEG_INF // round down\
5623/// _MM_FROUND_TO_POS_INF // round up\
5624/// _MM_FROUND_TO_ZERO // truncate\
5625/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5626///
5627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5628#[inline]
5629#[target_feature(enable = "avx512f,avx512vl")]
5630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5631#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5632#[rustc_legacy_const_generics(3)]
5633pub unsafe fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5634 src: __m256d,
5635 k: __mmask8,
5636 a: __m256d,
5637) -> __m256d {
5638 static_assert_uimm_bits!(IMM8, 8);
5639 let a: f64x4 = a.as_f64x4();
5640 let src: f64x4 = src.as_f64x4();
5641 let r: f64x4 = vrndscalepd256(a, IMM8, src, mask:k);
5642 transmute(src:r)
5643}
5644
5645/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5646/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5647/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5648/// _MM_FROUND_TO_NEG_INF // round down\
5649/// _MM_FROUND_TO_POS_INF // round up\
5650/// _MM_FROUND_TO_ZERO // truncate\
5651/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5652///
5653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5654#[inline]
5655#[target_feature(enable = "avx512f,avx512vl")]
5656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5657#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5658#[rustc_legacy_const_generics(2)]
5659pub unsafe fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5660 static_assert_uimm_bits!(IMM8, 8);
5661 let a: f64x4 = a.as_f64x4();
5662 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
5663 let r: f64x4 = vrndscalepd256(a, IMM8, src:zero, mask:k);
5664 transmute(src:r)
5665}
5666
5667/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5668/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5669/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5670/// _MM_FROUND_TO_NEG_INF // round down\
5671/// _MM_FROUND_TO_POS_INF // round up\
5672/// _MM_FROUND_TO_ZERO // truncate\
5673/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5674///
5675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5676#[inline]
5677#[target_feature(enable = "avx512f,avx512vl")]
5678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5679#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5680#[rustc_legacy_const_generics(1)]
5681pub unsafe fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5682 static_assert_uimm_bits!(IMM8, 8);
5683 let a: f64x2 = a.as_f64x2();
5684 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
5685 let r: f64x2 = vrndscalepd128(a, IMM8, src:zero, mask:0b00000011);
5686 transmute(src:r)
5687}
5688
5689/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5690/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5691/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5692/// _MM_FROUND_TO_NEG_INF // round down\
5693/// _MM_FROUND_TO_POS_INF // round up\
5694/// _MM_FROUND_TO_ZERO // truncate\
5695/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5696///
5697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5698#[inline]
5699#[target_feature(enable = "avx512f,avx512vl")]
5700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5701#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5702#[rustc_legacy_const_generics(3)]
5703pub unsafe fn _mm_mask_roundscale_pd<const IMM8: i32>(
5704 src: __m128d,
5705 k: __mmask8,
5706 a: __m128d,
5707) -> __m128d {
5708 static_assert_uimm_bits!(IMM8, 8);
5709 let a: f64x2 = a.as_f64x2();
5710 let src: f64x2 = src.as_f64x2();
5711 let r: f64x2 = vrndscalepd128(a, IMM8, src, mask:k);
5712 transmute(src:r)
5713}
5714
5715/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5716/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5717/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5718/// _MM_FROUND_TO_NEG_INF // round down\
5719/// _MM_FROUND_TO_POS_INF // round up\
5720/// _MM_FROUND_TO_ZERO // truncate\
5721/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5724#[inline]
5725#[target_feature(enable = "avx512f,avx512vl")]
5726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5727#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5728#[rustc_legacy_const_generics(2)]
5729pub unsafe fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5730 static_assert_uimm_bits!(IMM8, 8);
5731 let a: f64x2 = a.as_f64x2();
5732 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
5733 let r: f64x2 = vrndscalepd128(a, IMM8, src:zero, mask:k);
5734 transmute(src:r)
5735}
5736
5737/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5738///
5739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5740#[inline]
5741#[target_feature(enable = "avx512f")]
5742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5743#[cfg_attr(test, assert_instr(vscalefps))]
5744pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5745 transmute(src:vscalefps(
5746 a:a.as_f32x16(),
5747 b:b.as_f32x16(),
5748 src:_mm512_setzero_ps().as_f32x16(),
5749 mask:0b11111111_11111111,
5750 _MM_FROUND_CUR_DIRECTION,
5751 ))
5752}
5753
5754/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5755///
5756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5757#[inline]
5758#[target_feature(enable = "avx512f")]
5759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5760#[cfg_attr(test, assert_instr(vscalefps))]
5761pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5762 transmute(src:vscalefps(
5763 a:a.as_f32x16(),
5764 b:b.as_f32x16(),
5765 src:src.as_f32x16(),
5766 mask:k,
5767 _MM_FROUND_CUR_DIRECTION,
5768 ))
5769}
5770
5771/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5772///
5773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5774#[inline]
5775#[target_feature(enable = "avx512f")]
5776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5777#[cfg_attr(test, assert_instr(vscalefps))]
5778pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5779 transmute(src:vscalefps(
5780 a:a.as_f32x16(),
5781 b:b.as_f32x16(),
5782 src:_mm512_setzero_ps().as_f32x16(),
5783 mask:k,
5784 _MM_FROUND_CUR_DIRECTION,
5785 ))
5786}
5787
5788/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5789///
5790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
5791#[inline]
5792#[target_feature(enable = "avx512f,avx512vl")]
5793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5794#[cfg_attr(test, assert_instr(vscalefps))]
5795pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
5796 transmute(src:vscalefps256(
5797 a:a.as_f32x8(),
5798 b:b.as_f32x8(),
5799 src:_mm256_setzero_ps().as_f32x8(),
5800 mask:0b11111111,
5801 ))
5802}
5803
5804/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5805///
5806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
5807#[inline]
5808#[target_feature(enable = "avx512f,avx512vl")]
5809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5810#[cfg_attr(test, assert_instr(vscalefps))]
5811pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
5812 transmute(src:vscalefps256(a:a.as_f32x8(), b:b.as_f32x8(), src:src.as_f32x8(), mask:k))
5813}
5814
5815/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5816///
5817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
5818#[inline]
5819#[target_feature(enable = "avx512f,avx512vl")]
5820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5821#[cfg_attr(test, assert_instr(vscalefps))]
5822pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5823 transmute(src:vscalefps256(
5824 a:a.as_f32x8(),
5825 b:b.as_f32x8(),
5826 src:_mm256_setzero_ps().as_f32x8(),
5827 mask:k,
5828 ))
5829}
5830
5831/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5832///
5833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
5834#[inline]
5835#[target_feature(enable = "avx512f,avx512vl")]
5836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5837#[cfg_attr(test, assert_instr(vscalefps))]
5838pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
5839 transmute(src:vscalefps128(
5840 a:a.as_f32x4(),
5841 b:b.as_f32x4(),
5842 src:_mm_setzero_ps().as_f32x4(),
5843 mask:0b00001111,
5844 ))
5845}
5846
5847/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5848///
5849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
5850#[inline]
5851#[target_feature(enable = "avx512f,avx512vl")]
5852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5853#[cfg_attr(test, assert_instr(vscalefps))]
5854pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
5855 transmute(src:vscalefps128(a:a.as_f32x4(), b:b.as_f32x4(), src:src.as_f32x4(), mask:k))
5856}
5857
5858/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5859///
5860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
5861#[inline]
5862#[target_feature(enable = "avx512f,avx512vl")]
5863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5864#[cfg_attr(test, assert_instr(vscalefps))]
5865pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5866 transmute(src:vscalefps128(
5867 a:a.as_f32x4(),
5868 b:b.as_f32x4(),
5869 src:_mm_setzero_ps().as_f32x4(),
5870 mask:k,
5871 ))
5872}
5873
5874/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5875///
5876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
5877#[inline]
5878#[target_feature(enable = "avx512f")]
5879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5880#[cfg_attr(test, assert_instr(vscalefpd))]
5881pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
5882 transmute(src:vscalefpd(
5883 a:a.as_f64x8(),
5884 b:b.as_f64x8(),
5885 src:_mm512_setzero_pd().as_f64x8(),
5886 mask:0b11111111,
5887 _MM_FROUND_CUR_DIRECTION,
5888 ))
5889}
5890
5891/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
5894#[inline]
5895#[target_feature(enable = "avx512f")]
5896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5897#[cfg_attr(test, assert_instr(vscalefpd))]
5898pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5899 transmute(src:vscalefpd(
5900 a:a.as_f64x8(),
5901 b:b.as_f64x8(),
5902 src:src.as_f64x8(),
5903 mask:k,
5904 _MM_FROUND_CUR_DIRECTION,
5905 ))
5906}
5907
5908/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5909///
5910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
5911#[inline]
5912#[target_feature(enable = "avx512f")]
5913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5914#[cfg_attr(test, assert_instr(vscalefpd))]
5915pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5916 transmute(src:vscalefpd(
5917 a:a.as_f64x8(),
5918 b:b.as_f64x8(),
5919 src:_mm512_setzero_pd().as_f64x8(),
5920 mask:k,
5921 _MM_FROUND_CUR_DIRECTION,
5922 ))
5923}
5924
5925/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5926///
5927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
5928#[inline]
5929#[target_feature(enable = "avx512f,avx512vl")]
5930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5931#[cfg_attr(test, assert_instr(vscalefpd))]
5932pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
5933 transmute(src:vscalefpd256(
5934 a:a.as_f64x4(),
5935 b:b.as_f64x4(),
5936 src:_mm256_setzero_pd().as_f64x4(),
5937 mask:0b00001111,
5938 ))
5939}
5940
5941/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vscalefpd))]
5948pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5949 transmute(src:vscalefpd256(a:a.as_f64x4(), b:b.as_f64x4(), src:src.as_f64x4(), mask:k))
5950}
5951
5952/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5953///
5954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
5955#[inline]
5956#[target_feature(enable = "avx512f,avx512vl")]
5957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5958#[cfg_attr(test, assert_instr(vscalefpd))]
5959pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5960 transmute(src:vscalefpd256(
5961 a:a.as_f64x4(),
5962 b:b.as_f64x4(),
5963 src:_mm256_setzero_pd().as_f64x4(),
5964 mask:k,
5965 ))
5966}
5967
5968/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5969///
5970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
5971#[inline]
5972#[target_feature(enable = "avx512f,avx512vl")]
5973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5974#[cfg_attr(test, assert_instr(vscalefpd))]
5975pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
5976 transmute(src:vscalefpd128(
5977 a:a.as_f64x2(),
5978 b:b.as_f64x2(),
5979 src:_mm_setzero_pd().as_f64x2(),
5980 mask:0b00000011,
5981 ))
5982}
5983
5984/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5985///
5986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
5987#[inline]
5988#[target_feature(enable = "avx512f,avx512vl")]
5989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5990#[cfg_attr(test, assert_instr(vscalefpd))]
5991pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5992 transmute(src:vscalefpd128(a:a.as_f64x2(), b:b.as_f64x2(), src:src.as_f64x2(), mask:k))
5993}
5994
5995/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5996///
5997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
5998#[inline]
5999#[target_feature(enable = "avx512f,avx512vl")]
6000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6001#[cfg_attr(test, assert_instr(vscalefpd))]
6002pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6003 transmute(src:vscalefpd128(
6004 a:a.as_f64x2(),
6005 b:b.as_f64x2(),
6006 src:_mm_setzero_pd().as_f64x2(),
6007 mask:k,
6008 ))
6009}
6010
6011/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6012///
6013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6014#[inline]
6015#[target_feature(enable = "avx512f")]
6016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6017#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6018#[rustc_legacy_const_generics(3)]
6019pub unsafe fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6020 static_assert_uimm_bits!(IMM8, 8);
6021 let a: f32x16 = a.as_f32x16();
6022 let b: f32x16 = b.as_f32x16();
6023 let c: i32x16 = c.as_i32x16();
6024 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6025 transmute(src:r)
6026}
6027
6028/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6029///
6030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6031#[inline]
6032#[target_feature(enable = "avx512f")]
6033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6034#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6035#[rustc_legacy_const_generics(4)]
6036pub unsafe fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6037 a: __m512,
6038 k: __mmask16,
6039 b: __m512,
6040 c: __m512i,
6041) -> __m512 {
6042 static_assert_uimm_bits!(IMM8, 8);
6043 let a: f32x16 = a.as_f32x16();
6044 let b: f32x16 = b.as_f32x16();
6045 let c: i32x16 = c.as_i32x16();
6046 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6047 transmute(src:r)
6048}
6049
6050/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6051///
6052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6053#[inline]
6054#[target_feature(enable = "avx512f")]
6055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6056#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6057#[rustc_legacy_const_generics(4)]
6058pub unsafe fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6059 k: __mmask16,
6060 a: __m512,
6061 b: __m512,
6062 c: __m512i,
6063) -> __m512 {
6064 static_assert_uimm_bits!(IMM8, 8);
6065 let a: f32x16 = a.as_f32x16();
6066 let b: f32x16 = b.as_f32x16();
6067 let c: i32x16 = c.as_i32x16();
6068 let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6069 transmute(src:r)
6070}
6071
6072/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6073///
6074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6075#[inline]
6076#[target_feature(enable = "avx512f,avx512vl")]
6077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6078#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6079#[rustc_legacy_const_generics(3)]
6080pub unsafe fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6081 static_assert_uimm_bits!(IMM8, 8);
6082 let a: f32x8 = a.as_f32x8();
6083 let b: f32x8 = b.as_f32x8();
6084 let c: i32x8 = c.as_i32x8();
6085 let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:0b11111111);
6086 transmute(src:r)
6087}
6088
6089/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6092#[inline]
6093#[target_feature(enable = "avx512f,avx512vl")]
6094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6095#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6096#[rustc_legacy_const_generics(4)]
6097pub unsafe fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6098 a: __m256,
6099 k: __mmask8,
6100 b: __m256,
6101 c: __m256i,
6102) -> __m256 {
6103 static_assert_uimm_bits!(IMM8, 8);
6104 let a: f32x8 = a.as_f32x8();
6105 let b: f32x8 = b.as_f32x8();
6106 let c: i32x8 = c.as_i32x8();
6107 let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:k);
6108 transmute(src:r)
6109}
6110
6111/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6112///
6113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6114#[inline]
6115#[target_feature(enable = "avx512f,avx512vl")]
6116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6117#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6118#[rustc_legacy_const_generics(4)]
6119pub unsafe fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6120 k: __mmask8,
6121 a: __m256,
6122 b: __m256,
6123 c: __m256i,
6124) -> __m256 {
6125 static_assert_uimm_bits!(IMM8, 8);
6126 let a: f32x8 = a.as_f32x8();
6127 let b: f32x8 = b.as_f32x8();
6128 let c: i32x8 = c.as_i32x8();
6129 let r: f32x8 = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6130 transmute(src:r)
6131}
6132
6133/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6136#[inline]
6137#[target_feature(enable = "avx512f,avx512vl")]
6138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6140#[rustc_legacy_const_generics(3)]
6141pub unsafe fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6142 static_assert_uimm_bits!(IMM8, 8);
6143 let a: f32x4 = a.as_f32x4();
6144 let b: f32x4 = b.as_f32x4();
6145 let c: i32x4 = c.as_i32x4();
6146 let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:0b00001111);
6147 transmute(src:r)
6148}
6149
6150/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6151///
6152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6153#[inline]
6154#[target_feature(enable = "avx512f,avx512vl")]
6155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6156#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6157#[rustc_legacy_const_generics(4)]
6158pub unsafe fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6159 a: __m128,
6160 k: __mmask8,
6161 b: __m128,
6162 c: __m128i,
6163) -> __m128 {
6164 static_assert_uimm_bits!(IMM8, 8);
6165 let a: f32x4 = a.as_f32x4();
6166 let b: f32x4 = b.as_f32x4();
6167 let c: i32x4 = c.as_i32x4();
6168 let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:k);
6169 transmute(src:r)
6170}
6171
6172/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6173///
6174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6175#[inline]
6176#[target_feature(enable = "avx512f,avx512vl")]
6177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6178#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6179#[rustc_legacy_const_generics(4)]
6180pub unsafe fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6181 k: __mmask8,
6182 a: __m128,
6183 b: __m128,
6184 c: __m128i,
6185) -> __m128 {
6186 static_assert_uimm_bits!(IMM8, 8);
6187 let a: f32x4 = a.as_f32x4();
6188 let b: f32x4 = b.as_f32x4();
6189 let c: i32x4 = c.as_i32x4();
6190 let r: f32x4 = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6191 transmute(src:r)
6192}
6193
6194/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6195///
6196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6197#[inline]
6198#[target_feature(enable = "avx512f")]
6199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6200#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6201#[rustc_legacy_const_generics(3)]
6202pub unsafe fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6203 static_assert_uimm_bits!(IMM8, 8);
6204 let a: f64x8 = a.as_f64x8();
6205 let b: f64x8 = b.as_f64x8();
6206 let c: i64x8 = c.as_i64x8();
6207 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
6208 transmute(src:r)
6209}
6210
6211/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6212///
6213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6214#[inline]
6215#[target_feature(enable = "avx512f")]
6216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6217#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6218#[rustc_legacy_const_generics(4)]
6219pub unsafe fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6220 a: __m512d,
6221 k: __mmask8,
6222 b: __m512d,
6223 c: __m512i,
6224) -> __m512d {
6225 static_assert_uimm_bits!(IMM8, 8);
6226 let a: f64x8 = a.as_f64x8();
6227 let b: f64x8 = b.as_f64x8();
6228 let c: i64x8 = c.as_i64x8();
6229 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6230 transmute(src:r)
6231}
6232
6233/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6234///
6235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6236#[inline]
6237#[target_feature(enable = "avx512f")]
6238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6239#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6240#[rustc_legacy_const_generics(4)]
6241pub unsafe fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6242 k: __mmask8,
6243 a: __m512d,
6244 b: __m512d,
6245 c: __m512i,
6246) -> __m512d {
6247 static_assert_uimm_bits!(IMM8, 8);
6248 let a: f64x8 = a.as_f64x8();
6249 let b: f64x8 = b.as_f64x8();
6250 let c: i64x8 = c.as_i64x8();
6251 let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6252 transmute(src:r)
6253}
6254
6255/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6256///
6257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6258#[inline]
6259#[target_feature(enable = "avx512f,avx512vl")]
6260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6261#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6262#[rustc_legacy_const_generics(3)]
6263pub unsafe fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6264 static_assert_uimm_bits!(IMM8, 8);
6265 let a: f64x4 = a.as_f64x4();
6266 let b: f64x4 = b.as_f64x4();
6267 let c: i64x4 = c.as_i64x4();
6268 let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:0b00001111);
6269 transmute(src:r)
6270}
6271
6272/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6273///
6274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6275#[inline]
6276#[target_feature(enable = "avx512f,avx512vl")]
6277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6278#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6279#[rustc_legacy_const_generics(4)]
6280pub unsafe fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6281 a: __m256d,
6282 k: __mmask8,
6283 b: __m256d,
6284 c: __m256i,
6285) -> __m256d {
6286 static_assert_uimm_bits!(IMM8, 8);
6287 let a: f64x4 = a.as_f64x4();
6288 let b: f64x4 = b.as_f64x4();
6289 let c: i64x4 = c.as_i64x4();
6290 let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:k);
6291 transmute(src:r)
6292}
6293
6294/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6295///
6296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6297#[inline]
6298#[target_feature(enable = "avx512f,avx512vl")]
6299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6300#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6301#[rustc_legacy_const_generics(4)]
6302pub unsafe fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6303 k: __mmask8,
6304 a: __m256d,
6305 b: __m256d,
6306 c: __m256i,
6307) -> __m256d {
6308 static_assert_uimm_bits!(IMM8, 8);
6309 let a: f64x4 = a.as_f64x4();
6310 let b: f64x4 = b.as_f64x4();
6311 let c: i64x4 = c.as_i64x4();
6312 let r: f64x4 = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6313 transmute(src:r)
6314}
6315
6316/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6317///
6318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6319#[inline]
6320#[target_feature(enable = "avx512f,avx512vl")]
6321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6322#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6323#[rustc_legacy_const_generics(3)]
6324pub unsafe fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6325 static_assert_uimm_bits!(IMM8, 8);
6326 let a: f64x2 = a.as_f64x2();
6327 let b: f64x2 = b.as_f64x2();
6328 let c: i64x2 = c.as_i64x2();
6329 let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:0b00000011);
6330 transmute(src:r)
6331}
6332
6333/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6334///
6335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6336#[inline]
6337#[target_feature(enable = "avx512f,avx512vl")]
6338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6339#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6340#[rustc_legacy_const_generics(4)]
6341pub unsafe fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6342 a: __m128d,
6343 k: __mmask8,
6344 b: __m128d,
6345 c: __m128i,
6346) -> __m128d {
6347 static_assert_uimm_bits!(IMM8, 8);
6348 let a: f64x2 = a.as_f64x2();
6349 let b: f64x2 = b.as_f64x2();
6350 let c: i64x2 = c.as_i64x2();
6351 let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:k);
6352 transmute(src:r)
6353}
6354
6355/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6356///
6357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6358#[inline]
6359#[target_feature(enable = "avx512f,avx512vl")]
6360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6361#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6362#[rustc_legacy_const_generics(4)]
6363pub unsafe fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6364 k: __mmask8,
6365 a: __m128d,
6366 b: __m128d,
6367 c: __m128i,
6368) -> __m128d {
6369 static_assert_uimm_bits!(IMM8, 8);
6370 let a: f64x2 = a.as_f64x2();
6371 let b: f64x2 = b.as_f64x2();
6372 let c: i64x2 = c.as_i64x2();
6373 let r: f64x2 = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6374 transmute(src:r)
6375}
6376
6377/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6378///
6379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6380#[inline]
6381#[target_feature(enable = "avx512f")]
6382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6383#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6384#[rustc_legacy_const_generics(3)]
6385pub unsafe fn _mm512_ternarylogic_epi32<const IMM8: i32>(
6386 a: __m512i,
6387 b: __m512i,
6388 c: __m512i,
6389) -> __m512i {
6390 static_assert_uimm_bits!(IMM8, 8);
6391 let a: i32x16 = a.as_i32x16();
6392 let b: i32x16 = b.as_i32x16();
6393 let c: i32x16 = c.as_i32x16();
6394 let r: i32x16 = vpternlogd(a, b, c, IMM8);
6395 transmute(src:r)
6396}
6397
6398/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6399///
6400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6401#[inline]
6402#[target_feature(enable = "avx512f")]
6403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6404#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6405#[rustc_legacy_const_generics(4)]
6406pub unsafe fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6407 src: __m512i,
6408 k: __mmask16,
6409 a: __m512i,
6410 b: __m512i,
6411) -> __m512i {
6412 static_assert_uimm_bits!(IMM8, 8);
6413 let src: i32x16 = src.as_i32x16();
6414 let a: i32x16 = a.as_i32x16();
6415 let b: i32x16 = b.as_i32x16();
6416 let r: i32x16 = vpternlogd(a:src, b:a, c:b, IMM8);
6417 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6418}
6419
6420/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6421///
6422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6423#[inline]
6424#[target_feature(enable = "avx512f")]
6425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6426#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6427#[rustc_legacy_const_generics(4)]
6428pub unsafe fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6429 k: __mmask16,
6430 a: __m512i,
6431 b: __m512i,
6432 c: __m512i,
6433) -> __m512i {
6434 static_assert_uimm_bits!(IMM8, 8);
6435 let a: i32x16 = a.as_i32x16();
6436 let b: i32x16 = b.as_i32x16();
6437 let c: i32x16 = c.as_i32x16();
6438 let r: i32x16 = vpternlogd(a, b, c, IMM8);
6439 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
6440 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6441}
6442
6443/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6444///
6445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6446#[inline]
6447#[target_feature(enable = "avx512f,avx512vl")]
6448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6449#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6450#[rustc_legacy_const_generics(3)]
6451pub unsafe fn _mm256_ternarylogic_epi32<const IMM8: i32>(
6452 a: __m256i,
6453 b: __m256i,
6454 c: __m256i,
6455) -> __m256i {
6456 static_assert_uimm_bits!(IMM8, 8);
6457 let a: i32x8 = a.as_i32x8();
6458 let b: i32x8 = b.as_i32x8();
6459 let c: i32x8 = c.as_i32x8();
6460 let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6461 transmute(src:r)
6462}
6463
6464/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6465///
6466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6467#[inline]
6468#[target_feature(enable = "avx512f,avx512vl")]
6469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6470#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6471#[rustc_legacy_const_generics(4)]
6472pub unsafe fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6473 src: __m256i,
6474 k: __mmask8,
6475 a: __m256i,
6476 b: __m256i,
6477) -> __m256i {
6478 static_assert_uimm_bits!(IMM8, 8);
6479 let src: i32x8 = src.as_i32x8();
6480 let a: i32x8 = a.as_i32x8();
6481 let b: i32x8 = b.as_i32x8();
6482 let r: i32x8 = vpternlogd256(a:src, b:a, c:b, IMM8);
6483 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6484}
6485
6486/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6487///
6488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6489#[inline]
6490#[target_feature(enable = "avx512f,avx512vl")]
6491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6492#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6493#[rustc_legacy_const_generics(4)]
6494pub unsafe fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6495 k: __mmask8,
6496 a: __m256i,
6497 b: __m256i,
6498 c: __m256i,
6499) -> __m256i {
6500 static_assert_uimm_bits!(IMM8, 8);
6501 let a: i32x8 = a.as_i32x8();
6502 let b: i32x8 = b.as_i32x8();
6503 let c: i32x8 = c.as_i32x8();
6504 let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6505 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
6506 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6507}
6508
6509/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6510///
6511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6512#[inline]
6513#[target_feature(enable = "avx512f,avx512vl")]
6514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6515#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6516#[rustc_legacy_const_generics(3)]
6517pub unsafe fn _mm_ternarylogic_epi32<const IMM8: i32>(
6518 a: __m128i,
6519 b: __m128i,
6520 c: __m128i,
6521) -> __m128i {
6522 static_assert_uimm_bits!(IMM8, 8);
6523 let a: i32x4 = a.as_i32x4();
6524 let b: i32x4 = b.as_i32x4();
6525 let c: i32x4 = c.as_i32x4();
6526 let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6527 transmute(src:r)
6528}
6529
6530/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6531///
6532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6533#[inline]
6534#[target_feature(enable = "avx512f,avx512vl")]
6535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6536#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6537#[rustc_legacy_const_generics(4)]
6538pub unsafe fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6539 src: __m128i,
6540 k: __mmask8,
6541 a: __m128i,
6542 b: __m128i,
6543) -> __m128i {
6544 static_assert_uimm_bits!(IMM8, 8);
6545 let src: i32x4 = src.as_i32x4();
6546 let a: i32x4 = a.as_i32x4();
6547 let b: i32x4 = b.as_i32x4();
6548 let r: i32x4 = vpternlogd128(a:src, b:a, c:b, IMM8);
6549 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6550}
6551
6552/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6553///
6554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6555#[inline]
6556#[target_feature(enable = "avx512f,avx512vl")]
6557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6558#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6559#[rustc_legacy_const_generics(4)]
6560pub unsafe fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6561 k: __mmask8,
6562 a: __m128i,
6563 b: __m128i,
6564 c: __m128i,
6565) -> __m128i {
6566 static_assert_uimm_bits!(IMM8, 8);
6567 let a: i32x4 = a.as_i32x4();
6568 let b: i32x4 = b.as_i32x4();
6569 let c: i32x4 = c.as_i32x4();
6570 let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6571 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
6572 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6573}
6574
6575/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6576///
6577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6578#[inline]
6579#[target_feature(enable = "avx512f")]
6580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6581#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6582#[rustc_legacy_const_generics(3)]
6583pub unsafe fn _mm512_ternarylogic_epi64<const IMM8: i32>(
6584 a: __m512i,
6585 b: __m512i,
6586 c: __m512i,
6587) -> __m512i {
6588 static_assert_uimm_bits!(IMM8, 8);
6589 let a: i64x8 = a.as_i64x8();
6590 let b: i64x8 = b.as_i64x8();
6591 let c: i64x8 = c.as_i64x8();
6592 let r: i64x8 = vpternlogq(a, b, c, IMM8);
6593 transmute(src:r)
6594}
6595
6596/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6597///
6598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6599#[inline]
6600#[target_feature(enable = "avx512f")]
6601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6602#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6603#[rustc_legacy_const_generics(4)]
6604pub unsafe fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6605 src: __m512i,
6606 k: __mmask8,
6607 a: __m512i,
6608 b: __m512i,
6609) -> __m512i {
6610 static_assert_uimm_bits!(IMM8, 8);
6611 let src: i64x8 = src.as_i64x8();
6612 let a: i64x8 = a.as_i64x8();
6613 let b: i64x8 = b.as_i64x8();
6614 let r: i64x8 = vpternlogq(a:src, b:a, c:b, IMM8);
6615 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6616}
6617
6618/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6619///
6620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6621#[inline]
6622#[target_feature(enable = "avx512f")]
6623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6624#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6625#[rustc_legacy_const_generics(4)]
6626pub unsafe fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6627 k: __mmask8,
6628 a: __m512i,
6629 b: __m512i,
6630 c: __m512i,
6631) -> __m512i {
6632 static_assert_uimm_bits!(IMM8, 8);
6633 let a: i64x8 = a.as_i64x8();
6634 let b: i64x8 = b.as_i64x8();
6635 let c: i64x8 = c.as_i64x8();
6636 let r: i64x8 = vpternlogq(a, b, c, IMM8);
6637 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
6638 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6639}
6640
6641/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6642///
6643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6644#[inline]
6645#[target_feature(enable = "avx512f,avx512vl")]
6646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6647#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6648#[rustc_legacy_const_generics(3)]
6649pub unsafe fn _mm256_ternarylogic_epi64<const IMM8: i32>(
6650 a: __m256i,
6651 b: __m256i,
6652 c: __m256i,
6653) -> __m256i {
6654 static_assert_uimm_bits!(IMM8, 8);
6655 let a: i64x4 = a.as_i64x4();
6656 let b: i64x4 = b.as_i64x4();
6657 let c: i64x4 = c.as_i64x4();
6658 let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6659 transmute(src:r)
6660}
6661
6662/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6663///
6664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6665#[inline]
6666#[target_feature(enable = "avx512f,avx512vl")]
6667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6668#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6669#[rustc_legacy_const_generics(4)]
6670pub unsafe fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6671 src: __m256i,
6672 k: __mmask8,
6673 a: __m256i,
6674 b: __m256i,
6675) -> __m256i {
6676 static_assert_uimm_bits!(IMM8, 8);
6677 let src: i64x4 = src.as_i64x4();
6678 let a: i64x4 = a.as_i64x4();
6679 let b: i64x4 = b.as_i64x4();
6680 let r: i64x4 = vpternlogq256(a:src, b:a, c:b, IMM8);
6681 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6682}
6683
6684/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6685///
6686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6687#[inline]
6688#[target_feature(enable = "avx512f,avx512vl")]
6689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6690#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6691#[rustc_legacy_const_generics(4)]
6692pub unsafe fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6693 k: __mmask8,
6694 a: __m256i,
6695 b: __m256i,
6696 c: __m256i,
6697) -> __m256i {
6698 static_assert_uimm_bits!(IMM8, 8);
6699 let a: i64x4 = a.as_i64x4();
6700 let b: i64x4 = b.as_i64x4();
6701 let c: i64x4 = c.as_i64x4();
6702 let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6703 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
6704 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6705}
6706
6707/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6708///
6709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6710#[inline]
6711#[target_feature(enable = "avx512f,avx512vl")]
6712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6713#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6714#[rustc_legacy_const_generics(3)]
6715pub unsafe fn _mm_ternarylogic_epi64<const IMM8: i32>(
6716 a: __m128i,
6717 b: __m128i,
6718 c: __m128i,
6719) -> __m128i {
6720 static_assert_uimm_bits!(IMM8, 8);
6721 let a: i64x2 = a.as_i64x2();
6722 let b: i64x2 = b.as_i64x2();
6723 let c: i64x2 = c.as_i64x2();
6724 let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6725 transmute(src:r)
6726}
6727
6728/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6729///
6730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6731#[inline]
6732#[target_feature(enable = "avx512f,avx512vl")]
6733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6734#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6735#[rustc_legacy_const_generics(4)]
6736pub unsafe fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6737 src: __m128i,
6738 k: __mmask8,
6739 a: __m128i,
6740 b: __m128i,
6741) -> __m128i {
6742 static_assert_uimm_bits!(IMM8, 8);
6743 let src: i64x2 = src.as_i64x2();
6744 let a: i64x2 = a.as_i64x2();
6745 let b: i64x2 = b.as_i64x2();
6746 let r: i64x2 = vpternlogq128(a:src, b:a, c:b, IMM8);
6747 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6748}
6749
6750/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6751///
6752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
6753#[inline]
6754#[target_feature(enable = "avx512f,avx512vl")]
6755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6756#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6757#[rustc_legacy_const_generics(4)]
6758pub unsafe fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
6759 k: __mmask8,
6760 a: __m128i,
6761 b: __m128i,
6762 c: __m128i,
6763) -> __m128i {
6764 static_assert_uimm_bits!(IMM8, 8);
6765 let a: i64x2 = a.as_i64x2();
6766 let b: i64x2 = b.as_i64x2();
6767 let c: i64x2 = c.as_i64x2();
6768 let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6769 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
6770 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
6771}
6772
6773/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6774/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6775/// _MM_MANT_NORM_1_2 // interval [1, 2)
6776/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
6777/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
6778/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6779/// The sign is determined by sc which can take the following values:
6780/// _MM_MANT_SIGN_src // sign = sign(src)
6781/// _MM_MANT_SIGN_zero // sign = 0
6782/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6783///
6784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
6785#[inline]
6786#[target_feature(enable = "avx512f")]
6787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6788#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6789#[rustc_legacy_const_generics(1, 2)]
6790pub unsafe fn _mm512_getmant_ps<
6791 const NORM: _MM_MANTISSA_NORM_ENUM,
6792 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6793>(
6794 a: __m512,
6795) -> __m512 {
6796 static_assert_uimm_bits!(NORM, 4);
6797 static_assert_uimm_bits!(SIGN, 2);
6798 let a: f32x16 = a.as_f32x16();
6799 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
6800 let r: f32x16 = vgetmantps(
6801 a,
6802 SIGN << 2 | NORM,
6803 src:zero,
6804 m:0b11111111_11111111,
6805 _MM_FROUND_CUR_DIRECTION,
6806 );
6807 transmute(src:r)
6808}
6809
6810/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6811/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6812/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6813/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6814/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6815/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6816/// The sign is determined by sc which can take the following values:\
6817/// _MM_MANT_SIGN_src // sign = sign(src)\
6818/// _MM_MANT_SIGN_zero // sign = 0\
6819/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6820///
6821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
6822#[inline]
6823#[target_feature(enable = "avx512f")]
6824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6825#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6826#[rustc_legacy_const_generics(3, 4)]
6827pub unsafe fn _mm512_mask_getmant_ps<
6828 const NORM: _MM_MANTISSA_NORM_ENUM,
6829 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6830>(
6831 src: __m512,
6832 k: __mmask16,
6833 a: __m512,
6834) -> __m512 {
6835 static_assert_uimm_bits!(NORM, 4);
6836 static_assert_uimm_bits!(SIGN, 2);
6837 let a: f32x16 = a.as_f32x16();
6838 let src: f32x16 = src.as_f32x16();
6839 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
6840 transmute(src:r)
6841}
6842
6843/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6844/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6845/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6846/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6847/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6848/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6849/// The sign is determined by sc which can take the following values:\
6850/// _MM_MANT_SIGN_src // sign = sign(src)\
6851/// _MM_MANT_SIGN_zero // sign = 0\
6852/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6853///
6854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
6855#[inline]
6856#[target_feature(enable = "avx512f")]
6857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6858#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6859#[rustc_legacy_const_generics(2, 3)]
6860pub unsafe fn _mm512_maskz_getmant_ps<
6861 const NORM: _MM_MANTISSA_NORM_ENUM,
6862 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6863>(
6864 k: __mmask16,
6865 a: __m512,
6866) -> __m512 {
6867 static_assert_uimm_bits!(NORM, 4);
6868 static_assert_uimm_bits!(SIGN, 2);
6869 let a: f32x16 = a.as_f32x16();
6870 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
6871 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
6872 transmute(src:r)
6873}
6874
6875/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6876/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6877/// _MM_MANT_NORM_1_2 // interval [1, 2)
6878/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
6879/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
6880/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6881/// The sign is determined by sc which can take the following values:
6882/// _MM_MANT_SIGN_src // sign = sign(src)
6883/// _MM_MANT_SIGN_zero // sign = 0
6884/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6885///
6886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
6887#[inline]
6888#[target_feature(enable = "avx512f,avx512vl")]
6889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6890#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6891#[rustc_legacy_const_generics(1, 2)]
6892pub unsafe fn _mm256_getmant_ps<
6893 const NORM: _MM_MANTISSA_NORM_ENUM,
6894 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6895>(
6896 a: __m256,
6897) -> __m256 {
6898 static_assert_uimm_bits!(NORM, 4);
6899 static_assert_uimm_bits!(SIGN, 2);
6900 let a: f32x8 = a.as_f32x8();
6901 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
6902 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src:zero, m:0b11111111);
6903 transmute(src:r)
6904}
6905
6906/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6907/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6908/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6909/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6910/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6911/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6912/// The sign is determined by sc which can take the following values:\
6913/// _MM_MANT_SIGN_src // sign = sign(src)\
6914/// _MM_MANT_SIGN_zero // sign = 0\
6915/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6916///
6917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
6918#[inline]
6919#[target_feature(enable = "avx512f,avx512vl")]
6920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6921#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6922#[rustc_legacy_const_generics(3, 4)]
6923pub unsafe fn _mm256_mask_getmant_ps<
6924 const NORM: _MM_MANTISSA_NORM_ENUM,
6925 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6926>(
6927 src: __m256,
6928 k: __mmask8,
6929 a: __m256,
6930) -> __m256 {
6931 static_assert_uimm_bits!(NORM, 4);
6932 static_assert_uimm_bits!(SIGN, 2);
6933 let a: f32x8 = a.as_f32x8();
6934 let src: f32x8 = src.as_f32x8();
6935 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src, m:k);
6936 transmute(src:r)
6937}
6938
6939/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6940/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6941/// _MM_MANT_NORM_1_2 // interval [1, 2)\
6942/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6943/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6944/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6945/// The sign is determined by sc which can take the following values:\
6946/// _MM_MANT_SIGN_src // sign = sign(src)\
6947/// _MM_MANT_SIGN_zero // sign = 0\
6948/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6949///
6950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
6951#[inline]
6952#[target_feature(enable = "avx512f,avx512vl")]
6953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6954#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6955#[rustc_legacy_const_generics(2, 3)]
6956pub unsafe fn _mm256_maskz_getmant_ps<
6957 const NORM: _MM_MANTISSA_NORM_ENUM,
6958 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6959>(
6960 k: __mmask8,
6961 a: __m256,
6962) -> __m256 {
6963 static_assert_uimm_bits!(NORM, 4);
6964 static_assert_uimm_bits!(SIGN, 2);
6965 let a: f32x8 = a.as_f32x8();
6966 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
6967 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src:zero, m:k);
6968 transmute(src:r)
6969}
6970
6971/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6972/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6973/// _MM_MANT_NORM_1_2 // interval [1, 2)
6974/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
6975/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
6976/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6977/// The sign is determined by sc which can take the following values:
6978/// _MM_MANT_SIGN_src // sign = sign(src)
6979/// _MM_MANT_SIGN_zero // sign = 0
6980/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
6981///
6982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
6983#[inline]
6984#[target_feature(enable = "avx512f,avx512vl")]
6985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6986#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6987#[rustc_legacy_const_generics(1, 2)]
6988pub unsafe fn _mm_getmant_ps<
6989 const NORM: _MM_MANTISSA_NORM_ENUM,
6990 const SIGN: _MM_MANTISSA_SIGN_ENUM,
6991>(
6992 a: __m128,
6993) -> __m128 {
6994 static_assert_uimm_bits!(NORM, 4);
6995 static_assert_uimm_bits!(SIGN, 2);
6996 let a: f32x4 = a.as_f32x4();
6997 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
6998 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src:zero, m:0b00001111);
6999 transmute(src:r)
7000}
7001
7002/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7003/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7004/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7005/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7006/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7007/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7008/// The sign is determined by sc which can take the following values:\
7009/// _MM_MANT_SIGN_src // sign = sign(src)\
7010/// _MM_MANT_SIGN_zero // sign = 0\
7011/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7012///
7013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7014#[inline]
7015#[target_feature(enable = "avx512f,avx512vl")]
7016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7017#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7018#[rustc_legacy_const_generics(3, 4)]
7019pub unsafe fn _mm_mask_getmant_ps<
7020 const NORM: _MM_MANTISSA_NORM_ENUM,
7021 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7022>(
7023 src: __m128,
7024 k: __mmask8,
7025 a: __m128,
7026) -> __m128 {
7027 static_assert_uimm_bits!(NORM, 4);
7028 static_assert_uimm_bits!(SIGN, 2);
7029 let a: f32x4 = a.as_f32x4();
7030 let src: f32x4 = src.as_f32x4();
7031 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src, m:k);
7032 transmute(src:r)
7033}
7034
7035/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7036/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7037/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7038/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7039/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7040/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7041/// The sign is determined by sc which can take the following values:\
7042/// _MM_MANT_SIGN_src // sign = sign(src)\
7043/// _MM_MANT_SIGN_zero // sign = 0\
7044/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7045///
7046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7047#[inline]
7048#[target_feature(enable = "avx512f,avx512vl")]
7049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7050#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7051#[rustc_legacy_const_generics(2, 3)]
7052pub unsafe fn _mm_maskz_getmant_ps<
7053 const NORM: _MM_MANTISSA_NORM_ENUM,
7054 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7055>(
7056 k: __mmask8,
7057 a: __m128,
7058) -> __m128 {
7059 static_assert_uimm_bits!(NORM, 4);
7060 static_assert_uimm_bits!(SIGN, 2);
7061 let a: f32x4 = a.as_f32x4();
7062 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
7063 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src:zero, m:k);
7064 transmute(src:r)
7065}
7066
7067/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7068/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7069/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7070/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7071/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7072/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7073/// The sign is determined by sc which can take the following values:\
7074/// _MM_MANT_SIGN_src // sign = sign(src)\
7075/// _MM_MANT_SIGN_zero // sign = 0\
7076/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7077///
7078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7079#[inline]
7080#[target_feature(enable = "avx512f")]
7081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7082#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7083#[rustc_legacy_const_generics(1, 2)]
7084pub unsafe fn _mm512_getmant_pd<
7085 const NORM: _MM_MANTISSA_NORM_ENUM,
7086 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7087>(
7088 a: __m512d,
7089) -> __m512d {
7090 static_assert_uimm_bits!(NORM, 4);
7091 static_assert_uimm_bits!(SIGN, 2);
7092 let a: f64x8 = a.as_f64x8();
7093 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7094 let r: f64x8 = vgetmantpd(
7095 a,
7096 SIGN << 2 | NORM,
7097 src:zero,
7098 m:0b11111111,
7099 _MM_FROUND_CUR_DIRECTION,
7100 );
7101 transmute(src:r)
7102}
7103
7104/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7105/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7106/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7107/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7108/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7109/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7110/// The sign is determined by sc which can take the following values:\
7111/// _MM_MANT_SIGN_src // sign = sign(src)\
7112/// _MM_MANT_SIGN_zero // sign = 0\
7113/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7114///
7115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7116#[inline]
7117#[target_feature(enable = "avx512f")]
7118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7119#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7120#[rustc_legacy_const_generics(3, 4)]
7121pub unsafe fn _mm512_mask_getmant_pd<
7122 const NORM: _MM_MANTISSA_NORM_ENUM,
7123 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7124>(
7125 src: __m512d,
7126 k: __mmask8,
7127 a: __m512d,
7128) -> __m512d {
7129 static_assert_uimm_bits!(NORM, 4);
7130 static_assert_uimm_bits!(SIGN, 2);
7131 let a: f64x8 = a.as_f64x8();
7132 let src: f64x8 = src.as_f64x8();
7133 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7134 transmute(src:r)
7135}
7136
7137/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7138/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7139/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7140/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7141/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7142/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7143/// The sign is determined by sc which can take the following values:\
7144/// _MM_MANT_SIGN_src // sign = sign(src)\
7145/// _MM_MANT_SIGN_zero // sign = 0\
7146/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7147///
7148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7149#[inline]
7150#[target_feature(enable = "avx512f")]
7151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7152#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7153#[rustc_legacy_const_generics(2, 3)]
7154pub unsafe fn _mm512_maskz_getmant_pd<
7155 const NORM: _MM_MANTISSA_NORM_ENUM,
7156 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7157>(
7158 k: __mmask8,
7159 a: __m512d,
7160) -> __m512d {
7161 static_assert_uimm_bits!(NORM, 4);
7162 static_assert_uimm_bits!(SIGN, 2);
7163 let a: f64x8 = a.as_f64x8();
7164 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7165 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
7166 transmute(src:r)
7167}
7168
7169/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7170/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7171/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7172/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7173/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7174/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7175/// The sign is determined by sc which can take the following values:\
7176/// _MM_MANT_SIGN_src // sign = sign(src)\
7177/// _MM_MANT_SIGN_zero // sign = 0\
7178/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7179///
7180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7181#[inline]
7182#[target_feature(enable = "avx512f,avx512vl")]
7183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7184#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7185#[rustc_legacy_const_generics(1, 2)]
7186pub unsafe fn _mm256_getmant_pd<
7187 const NORM: _MM_MANTISSA_NORM_ENUM,
7188 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7189>(
7190 a: __m256d,
7191) -> __m256d {
7192 static_assert_uimm_bits!(NORM, 4);
7193 static_assert_uimm_bits!(SIGN, 2);
7194 let a: f64x4 = a.as_f64x4();
7195 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
7196 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src:zero, m:0b00001111);
7197 transmute(src:r)
7198}
7199
7200/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7201/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7202/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7203/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7204/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7205/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7206/// The sign is determined by sc which can take the following values:\
7207/// _MM_MANT_SIGN_src // sign = sign(src)\
7208/// _MM_MANT_SIGN_zero // sign = 0\
7209/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7210///
7211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7212#[inline]
7213#[target_feature(enable = "avx512f,avx512vl")]
7214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7215#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7216#[rustc_legacy_const_generics(3, 4)]
7217pub unsafe fn _mm256_mask_getmant_pd<
7218 const NORM: _MM_MANTISSA_NORM_ENUM,
7219 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7220>(
7221 src: __m256d,
7222 k: __mmask8,
7223 a: __m256d,
7224) -> __m256d {
7225 static_assert_uimm_bits!(NORM, 4);
7226 static_assert_uimm_bits!(SIGN, 2);
7227 let a: f64x4 = a.as_f64x4();
7228 let src: f64x4 = src.as_f64x4();
7229 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src, m:k);
7230 transmute(src:r)
7231}
7232
7233/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7234/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7235/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7236/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7237/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7238/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7239/// The sign is determined by sc which can take the following values:\
7240/// _MM_MANT_SIGN_src // sign = sign(src)\
7241/// _MM_MANT_SIGN_zero // sign = 0\
7242/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7243///
7244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7245#[inline]
7246#[target_feature(enable = "avx512f,avx512vl")]
7247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7248#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7249#[rustc_legacy_const_generics(2, 3)]
7250pub unsafe fn _mm256_maskz_getmant_pd<
7251 const NORM: _MM_MANTISSA_NORM_ENUM,
7252 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7253>(
7254 k: __mmask8,
7255 a: __m256d,
7256) -> __m256d {
7257 static_assert_uimm_bits!(NORM, 4);
7258 static_assert_uimm_bits!(SIGN, 2);
7259 let a: f64x4 = a.as_f64x4();
7260 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
7261 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src:zero, m:k);
7262 transmute(src:r)
7263}
7264
7265/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7266/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7267/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7268/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7269/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7270/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7271/// The sign is determined by sc which can take the following values:\
7272/// _MM_MANT_SIGN_src // sign = sign(src)\
7273/// _MM_MANT_SIGN_zero // sign = 0\
7274/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7275///
7276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7277#[inline]
7278#[target_feature(enable = "avx512f,avx512vl")]
7279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7280#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7281#[rustc_legacy_const_generics(1, 2)]
7282pub unsafe fn _mm_getmant_pd<
7283 const NORM: _MM_MANTISSA_NORM_ENUM,
7284 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7285>(
7286 a: __m128d,
7287) -> __m128d {
7288 static_assert_uimm_bits!(NORM, 4);
7289 static_assert_uimm_bits!(SIGN, 2);
7290 let a: f64x2 = a.as_f64x2();
7291 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
7292 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src:zero, m:0b00000011);
7293 transmute(src:r)
7294}
7295
7296/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7297/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7298/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7299/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7300/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7301/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7302/// The sign is determined by sc which can take the following values:\
7303/// _MM_MANT_SIGN_src // sign = sign(src)\
7304/// _MM_MANT_SIGN_zero // sign = 0\
7305/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7306///
7307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7308#[inline]
7309#[target_feature(enable = "avx512f,avx512vl")]
7310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7311#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7312#[rustc_legacy_const_generics(3, 4)]
7313pub unsafe fn _mm_mask_getmant_pd<
7314 const NORM: _MM_MANTISSA_NORM_ENUM,
7315 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7316>(
7317 src: __m128d,
7318 k: __mmask8,
7319 a: __m128d,
7320) -> __m128d {
7321 static_assert_uimm_bits!(NORM, 4);
7322 static_assert_uimm_bits!(SIGN, 2);
7323 let a: f64x2 = a.as_f64x2();
7324 let src: f64x2 = src.as_f64x2();
7325 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src, m:k);
7326 transmute(src:r)
7327}
7328
7329/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7330/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7331/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7332/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7333/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7334/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7335/// The sign is determined by sc which can take the following values:\
7336/// _MM_MANT_SIGN_src // sign = sign(src)\
7337/// _MM_MANT_SIGN_zero // sign = 0\
7338/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7339///
7340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7341#[inline]
7342#[target_feature(enable = "avx512f,avx512vl")]
7343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7344#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7345#[rustc_legacy_const_generics(2, 3)]
7346pub unsafe fn _mm_maskz_getmant_pd<
7347 const NORM: _MM_MANTISSA_NORM_ENUM,
7348 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7349>(
7350 k: __mmask8,
7351 a: __m128d,
7352) -> __m128d {
7353 static_assert_uimm_bits!(NORM, 4);
7354 static_assert_uimm_bits!(SIGN, 2);
7355 let a: f64x2 = a.as_f64x2();
7356 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
7357 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src:zero, m:k);
7358 transmute(src:r)
7359}
7360
7361/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7362///
7363/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7364/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7365/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7366/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7367/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7368/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7369///
7370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7371#[inline]
7372#[target_feature(enable = "avx512f")]
7373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7374#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7375#[rustc_legacy_const_generics(2)]
7376pub unsafe fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7377 static_assert_rounding!(ROUNDING);
7378 let a: f32x16 = a.as_f32x16();
7379 let b: f32x16 = b.as_f32x16();
7380 let r: f32x16 = vaddps(a, b, ROUNDING);
7381 transmute(src:r)
7382}
7383
7384/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7385///
7386/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7387/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7388/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7389/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7390/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7391/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7392///
7393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7394#[inline]
7395#[target_feature(enable = "avx512f")]
7396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7397#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7398#[rustc_legacy_const_generics(4)]
7399pub unsafe fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7400 src: __m512,
7401 k: __mmask16,
7402 a: __m512,
7403 b: __m512,
7404) -> __m512 {
7405 static_assert_rounding!(ROUNDING);
7406 let a: f32x16 = a.as_f32x16();
7407 let b: f32x16 = b.as_f32x16();
7408 let r: f32x16 = vaddps(a, b, ROUNDING);
7409 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7410}
7411
7412/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7413///
7414/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7415/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7416/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7417/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7418/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7419/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7420///
7421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7422#[inline]
7423#[target_feature(enable = "avx512f")]
7424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7425#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7426#[rustc_legacy_const_generics(3)]
7427pub unsafe fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7428 k: __mmask16,
7429 a: __m512,
7430 b: __m512,
7431) -> __m512 {
7432 static_assert_rounding!(ROUNDING);
7433 let a: f32x16 = a.as_f32x16();
7434 let b: f32x16 = b.as_f32x16();
7435 let r: f32x16 = vaddps(a, b, ROUNDING);
7436 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7437 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7438}
7439
7440/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7441///
7442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7443/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7444/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7445/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7446/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7447/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7448///
7449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7450#[inline]
7451#[target_feature(enable = "avx512f")]
7452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7453#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7454#[rustc_legacy_const_generics(2)]
7455pub unsafe fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7456 static_assert_rounding!(ROUNDING);
7457 let a: f64x8 = a.as_f64x8();
7458 let b: f64x8 = b.as_f64x8();
7459 let r: f64x8 = vaddpd(a, b, ROUNDING);
7460 transmute(src:r)
7461}
7462
7463/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7464///
7465/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7466/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7467/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7468/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7469/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7470/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7471///
7472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7473#[inline]
7474#[target_feature(enable = "avx512f")]
7475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7476#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7477#[rustc_legacy_const_generics(4)]
7478pub unsafe fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7479 src: __m512d,
7480 k: __mmask8,
7481 a: __m512d,
7482 b: __m512d,
7483) -> __m512d {
7484 static_assert_rounding!(ROUNDING);
7485 let a: f64x8 = a.as_f64x8();
7486 let b: f64x8 = b.as_f64x8();
7487 let r: f64x8 = vaddpd(a, b, ROUNDING);
7488 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7489}
7490
7491/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7492///
7493/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7494/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7495/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7496/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7497/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7498/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7499///
7500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7501#[inline]
7502#[target_feature(enable = "avx512f")]
7503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7504#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7505#[rustc_legacy_const_generics(3)]
7506pub unsafe fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7507 k: __mmask8,
7508 a: __m512d,
7509 b: __m512d,
7510) -> __m512d {
7511 static_assert_rounding!(ROUNDING);
7512 let a: f64x8 = a.as_f64x8();
7513 let b: f64x8 = b.as_f64x8();
7514 let r: f64x8 = vaddpd(a, b, ROUNDING);
7515 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7516 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7517}
7518
7519/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7520///
7521/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7522/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7523/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7524/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7525/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7526/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7527///
7528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7529#[inline]
7530#[target_feature(enable = "avx512f")]
7531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7532#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7533#[rustc_legacy_const_generics(2)]
7534pub unsafe fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7535 static_assert_rounding!(ROUNDING);
7536 let a: f32x16 = a.as_f32x16();
7537 let b: f32x16 = b.as_f32x16();
7538 let r: f32x16 = vsubps(a, b, ROUNDING);
7539 transmute(src:r)
7540}
7541
7542/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7543///
7544/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7545/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7546/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7547/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7548/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7549/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7550///
7551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7552#[inline]
7553#[target_feature(enable = "avx512f")]
7554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7555#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7556#[rustc_legacy_const_generics(4)]
7557pub unsafe fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7558 src: __m512,
7559 k: __mmask16,
7560 a: __m512,
7561 b: __m512,
7562) -> __m512 {
7563 static_assert_rounding!(ROUNDING);
7564 let a: f32x16 = a.as_f32x16();
7565 let b: f32x16 = b.as_f32x16();
7566 let r: f32x16 = vsubps(a, b, ROUNDING);
7567 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7568}
7569
7570/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7571///
7572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7573/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7574/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7575/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7576/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7577/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7578///
7579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7580#[inline]
7581#[target_feature(enable = "avx512f")]
7582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7583#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7584#[rustc_legacy_const_generics(3)]
7585pub unsafe fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7586 k: __mmask16,
7587 a: __m512,
7588 b: __m512,
7589) -> __m512 {
7590 static_assert_rounding!(ROUNDING);
7591 let a: f32x16 = a.as_f32x16();
7592 let b: f32x16 = b.as_f32x16();
7593 let r: f32x16 = vsubps(a, b, ROUNDING);
7594 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7595 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7596}
7597
7598/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7599///
7600/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7601/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7602/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7603/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7604/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7605/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7606///
7607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7608#[inline]
7609#[target_feature(enable = "avx512f")]
7610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7611#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7612#[rustc_legacy_const_generics(2)]
7613pub unsafe fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7614 static_assert_rounding!(ROUNDING);
7615 let a: f64x8 = a.as_f64x8();
7616 let b: f64x8 = b.as_f64x8();
7617 let r: f64x8 = vsubpd(a, b, ROUNDING);
7618 transmute(src:r)
7619}
7620
7621/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7622///
7623/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7624/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7625/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7626/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7627/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7628/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7629///
7630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7631#[inline]
7632#[target_feature(enable = "avx512f")]
7633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7634#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7635#[rustc_legacy_const_generics(4)]
7636pub unsafe fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7637 src: __m512d,
7638 k: __mmask8,
7639 a: __m512d,
7640 b: __m512d,
7641) -> __m512d {
7642 static_assert_rounding!(ROUNDING);
7643 let a: f64x8 = a.as_f64x8();
7644 let b: f64x8 = b.as_f64x8();
7645 let r: f64x8 = vsubpd(a, b, ROUNDING);
7646 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7647}
7648
7649/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7650///
7651/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7652/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7653/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7654/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7655/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7656/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7657///
7658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7659#[inline]
7660#[target_feature(enable = "avx512f")]
7661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7662#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7663#[rustc_legacy_const_generics(3)]
7664pub unsafe fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7665 k: __mmask8,
7666 a: __m512d,
7667 b: __m512d,
7668) -> __m512d {
7669 static_assert_rounding!(ROUNDING);
7670 let a: f64x8 = a.as_f64x8();
7671 let b: f64x8 = b.as_f64x8();
7672 let r: f64x8 = vsubpd(a, b, ROUNDING);
7673 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7674 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7675}
7676
7677/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7678///
7679/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7680/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7681/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7682/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7683/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7684/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7685///
7686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7687#[inline]
7688#[target_feature(enable = "avx512f")]
7689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7690#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7691#[rustc_legacy_const_generics(2)]
7692pub unsafe fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7693 static_assert_rounding!(ROUNDING);
7694 let a: f32x16 = a.as_f32x16();
7695 let b: f32x16 = b.as_f32x16();
7696 let r: f32x16 = vmulps(a, b, ROUNDING);
7697 transmute(src:r)
7698}
7699
7700/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7701///
7702/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7703/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7704/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7705/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7706/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7707/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7708///
7709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
7710#[inline]
7711#[target_feature(enable = "avx512f")]
7712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7713#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7714#[rustc_legacy_const_generics(4)]
7715pub unsafe fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
7716 src: __m512,
7717 k: __mmask16,
7718 a: __m512,
7719 b: __m512,
7720) -> __m512 {
7721 static_assert_rounding!(ROUNDING);
7722 let a: f32x16 = a.as_f32x16();
7723 let b: f32x16 = b.as_f32x16();
7724 let r: f32x16 = vmulps(a, b, ROUNDING);
7725 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7726}
7727
7728/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7732/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7733/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7734/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7735/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(3)]
7743pub unsafe fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
7744 k: __mmask16,
7745 a: __m512,
7746 b: __m512,
7747) -> __m512 {
7748 static_assert_rounding!(ROUNDING);
7749 let a: f32x16 = a.as_f32x16();
7750 let b: f32x16 = b.as_f32x16();
7751 let r: f32x16 = vmulps(a, b, ROUNDING);
7752 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7753 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7754}
7755
7756/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7757///
7758/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7759/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7760/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7761/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7762/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7763/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7764///
7765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
7766#[inline]
7767#[target_feature(enable = "avx512f")]
7768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7769#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
7770#[rustc_legacy_const_generics(2)]
7771pub unsafe fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7772 static_assert_rounding!(ROUNDING);
7773 let a: f64x8 = a.as_f64x8();
7774 let b: f64x8 = b.as_f64x8();
7775 let r: f64x8 = vmulpd(a, b, ROUNDING);
7776 transmute(src:r)
7777}
7778
7779/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7780///
7781/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7782/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7783/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7784/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7785/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7786/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7787///
7788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
7789#[inline]
7790#[target_feature(enable = "avx512f")]
7791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7792#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
7793#[rustc_legacy_const_generics(4)]
7794pub unsafe fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
7795 src: __m512d,
7796 k: __mmask8,
7797 a: __m512d,
7798 b: __m512d,
7799) -> __m512d {
7800 static_assert_rounding!(ROUNDING);
7801 let a: f64x8 = a.as_f64x8();
7802 let b: f64x8 = b.as_f64x8();
7803 let r: f64x8 = vmulpd(a, b, ROUNDING);
7804 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7805}
7806
7807/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7808///
7809/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7810/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7811/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7812/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7813/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7814/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7815///
7816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
7817#[inline]
7818#[target_feature(enable = "avx512f")]
7819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7820#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
7821#[rustc_legacy_const_generics(3)]
7822pub unsafe fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
7823 k: __mmask8,
7824 a: __m512d,
7825 b: __m512d,
7826) -> __m512d {
7827 static_assert_rounding!(ROUNDING);
7828 let a: f64x8 = a.as_f64x8();
7829 let b: f64x8 = b.as_f64x8();
7830 let r: f64x8 = vmulpd(a, b, ROUNDING);
7831 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7832 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7833}
7834
7835/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
7836///
7837/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7838/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7839/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7840/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7841/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7842/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7843///
7844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
7845#[inline]
7846#[target_feature(enable = "avx512f")]
7847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7848#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
7849#[rustc_legacy_const_generics(2)]
7850pub unsafe fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7851 static_assert_rounding!(ROUNDING);
7852 let a: f32x16 = a.as_f32x16();
7853 let b: f32x16 = b.as_f32x16();
7854 let r: f32x16 = vdivps(a, b, ROUNDING);
7855 transmute(src:r)
7856}
7857
7858/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7859///
7860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7861/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7862/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7863/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7864/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7865/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7866///
7867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
7868#[inline]
7869#[target_feature(enable = "avx512f")]
7870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7871#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
7872#[rustc_legacy_const_generics(4)]
7873pub unsafe fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
7874 src: __m512,
7875 k: __mmask16,
7876 a: __m512,
7877 b: __m512,
7878) -> __m512 {
7879 static_assert_rounding!(ROUNDING);
7880 let a: f32x16 = a.as_f32x16();
7881 let b: f32x16 = b.as_f32x16();
7882 let r: f32x16 = vdivps(a, b, ROUNDING);
7883 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7884}
7885
7886/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7887///
7888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7889/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7890/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7891/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7892/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7893/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7894///
7895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
7896#[inline]
7897#[target_feature(enable = "avx512f")]
7898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7899#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
7900#[rustc_legacy_const_generics(3)]
7901pub unsafe fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
7902 k: __mmask16,
7903 a: __m512,
7904 b: __m512,
7905) -> __m512 {
7906 static_assert_rounding!(ROUNDING);
7907 let a: f32x16 = a.as_f32x16();
7908 let b: f32x16 = b.as_f32x16();
7909 let r: f32x16 = vdivps(a, b, ROUNDING);
7910 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
7911 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7912}
7913
7914/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
7915///
7916/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7917/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7918/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7919/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7920/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7921/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7922///
7923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
7924#[inline]
7925#[target_feature(enable = "avx512f")]
7926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7927#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
7928#[rustc_legacy_const_generics(2)]
7929pub unsafe fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7930 static_assert_rounding!(ROUNDING);
7931 let a: f64x8 = a.as_f64x8();
7932 let b: f64x8 = b.as_f64x8();
7933 let r: f64x8 = vdivpd(a, b, ROUNDING);
7934 transmute(src:r)
7935}
7936
7937/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7938///
7939/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7940/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7941/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7942/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7943/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7944/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7945///
7946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
7947#[inline]
7948#[target_feature(enable = "avx512f")]
7949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7950#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
7951#[rustc_legacy_const_generics(4)]
7952pub unsafe fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
7953 src: __m512d,
7954 k: __mmask8,
7955 a: __m512d,
7956 b: __m512d,
7957) -> __m512d {
7958 static_assert_rounding!(ROUNDING);
7959 let a: f64x8 = a.as_f64x8();
7960 let b: f64x8 = b.as_f64x8();
7961 let r: f64x8 = vdivpd(a, b, ROUNDING);
7962 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7963}
7964
7965/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7966///
7967/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7968/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7969/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7970/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7971/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7972/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7973///
7974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
7975#[inline]
7976#[target_feature(enable = "avx512f")]
7977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7978#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
7979#[rustc_legacy_const_generics(3)]
7980pub unsafe fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
7981 k: __mmask8,
7982 a: __m512d,
7983 b: __m512d,
7984) -> __m512d {
7985 static_assert_rounding!(ROUNDING);
7986 let a: f64x8 = a.as_f64x8();
7987 let b: f64x8 = b.as_f64x8();
7988 let r: f64x8 = vdivpd(a, b, ROUNDING);
7989 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
7990 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
7991}
7992
7993/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7994///
7995/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7996/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7997/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7998/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7999/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8000/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8001///
8002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8003#[inline]
8004#[target_feature(enable = "avx512f")]
8005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8006#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8007#[rustc_legacy_const_generics(1)]
8008pub unsafe fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8009 static_assert_rounding!(ROUNDING);
8010 let a: f32x16 = a.as_f32x16();
8011 let r: f32x16 = vsqrtps(a, ROUNDING);
8012 transmute(src:r)
8013}
8014
8015/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8016///
8017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8018/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8019/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8020/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8021/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8022/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8023///
8024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8025#[inline]
8026#[target_feature(enable = "avx512f")]
8027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8028#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8029#[rustc_legacy_const_generics(3)]
8030pub unsafe fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8031 src: __m512,
8032 k: __mmask16,
8033 a: __m512,
8034) -> __m512 {
8035 static_assert_rounding!(ROUNDING);
8036 let a: f32x16 = a.as_f32x16();
8037 let r: f32x16 = vsqrtps(a, ROUNDING);
8038 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8039}
8040
8041/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8042///
8043/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8044/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8045/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8046/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8047/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8048/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8049///
8050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8051#[inline]
8052#[target_feature(enable = "avx512f")]
8053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8054#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8055#[rustc_legacy_const_generics(2)]
8056pub unsafe fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8057 static_assert_rounding!(ROUNDING);
8058 let a: f32x16 = a.as_f32x16();
8059 let r: f32x16 = vsqrtps(a, ROUNDING);
8060 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
8061 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8062}
8063
8064/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8068/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8069/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8070/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8071/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(1)]
8079pub unsafe fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8080 static_assert_rounding!(ROUNDING);
8081 let a: f64x8 = a.as_f64x8();
8082 let r: f64x8 = vsqrtpd(a, ROUNDING);
8083 transmute(src:r)
8084}
8085
8086/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8087///
8088/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8089/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8090/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8091/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8092/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8093/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8094///
8095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8096#[inline]
8097#[target_feature(enable = "avx512f")]
8098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8099#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8100#[rustc_legacy_const_generics(3)]
8101pub unsafe fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8102 src: __m512d,
8103 k: __mmask8,
8104 a: __m512d,
8105) -> __m512d {
8106 static_assert_rounding!(ROUNDING);
8107 let a: f64x8 = a.as_f64x8();
8108 let r: f64x8 = vsqrtpd(a, ROUNDING);
8109 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8110}
8111
8112/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8113///
8114/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8115/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8116/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8117/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8118/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8119/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8120///
8121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8122#[inline]
8123#[target_feature(enable = "avx512f")]
8124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8125#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8126#[rustc_legacy_const_generics(2)]
8127pub unsafe fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8128 static_assert_rounding!(ROUNDING);
8129 let a: f64x8 = a.as_f64x8();
8130 let r: f64x8 = vsqrtpd(a, ROUNDING);
8131 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
8132 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8133}
8134
8135/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8136///
8137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8138/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8139/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8140/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8141/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8142/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8143///
8144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8145#[inline]
8146#[target_feature(enable = "avx512f")]
8147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8148#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8149#[rustc_legacy_const_generics(3)]
8150pub unsafe fn _mm512_fmadd_round_ps<const ROUNDING: i32>(
8151 a: __m512,
8152 b: __m512,
8153 c: __m512,
8154) -> __m512 {
8155 static_assert_rounding!(ROUNDING);
8156 let a: f32x16 = a.as_f32x16();
8157 let b: f32x16 = b.as_f32x16();
8158 let c: f32x16 = c.as_f32x16();
8159 let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8160 transmute(src:r)
8161}
8162
8163/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8164///
8165/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8166/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8167/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8168/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8169/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8170/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8171///
8172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8173#[inline]
8174#[target_feature(enable = "avx512f")]
8175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8176#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8177#[rustc_legacy_const_generics(4)]
8178pub unsafe fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8179 a: __m512,
8180 k: __mmask16,
8181 b: __m512,
8182 c: __m512,
8183) -> __m512 {
8184 static_assert_rounding!(ROUNDING);
8185 let a: f32x16 = a.as_f32x16();
8186 let b: f32x16 = b.as_f32x16();
8187 let c: f32x16 = c.as_f32x16();
8188 let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8189 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8190}
8191
8192/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8193///
8194/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8195/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8196/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8197/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8198/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8199/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8200///
8201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8202#[inline]
8203#[target_feature(enable = "avx512f")]
8204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8205#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8206#[rustc_legacy_const_generics(4)]
8207pub unsafe fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8208 k: __mmask16,
8209 a: __m512,
8210 b: __m512,
8211 c: __m512,
8212) -> __m512 {
8213 static_assert_rounding!(ROUNDING);
8214 let a: f32x16 = a.as_f32x16();
8215 let b: f32x16 = b.as_f32x16();
8216 let c: f32x16 = c.as_f32x16();
8217 let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8218 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
8219 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8220}
8221
8222/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8223///
8224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8225/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8226/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8227/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8228/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8229/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8230///
8231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8232#[inline]
8233#[target_feature(enable = "avx512f")]
8234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8235#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8236#[rustc_legacy_const_generics(4)]
8237pub unsafe fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8238 a: __m512,
8239 b: __m512,
8240 c: __m512,
8241 k: __mmask16,
8242) -> __m512 {
8243 static_assert_rounding!(ROUNDING);
8244 let a: f32x16 = a.as_f32x16();
8245 let b: f32x16 = b.as_f32x16();
8246 let c: f32x16 = c.as_f32x16();
8247 let r: f32x16 = vfmadd132psround(a, b, c, ROUNDING);
8248 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8249}
8250
8251/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8252///
8253/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8254/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8255/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8256/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8257/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8258/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8259///
8260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8261#[inline]
8262#[target_feature(enable = "avx512f")]
8263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8264#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8265#[rustc_legacy_const_generics(3)]
8266pub unsafe fn _mm512_fmadd_round_pd<const ROUNDING: i32>(
8267 a: __m512d,
8268 b: __m512d,
8269 c: __m512d,
8270) -> __m512d {
8271 static_assert_rounding!(ROUNDING);
8272 let a: f64x8 = a.as_f64x8();
8273 let b: f64x8 = b.as_f64x8();
8274 let c: f64x8 = c.as_f64x8();
8275 let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8276 transmute(src:r)
8277}
8278
8279/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8280///
8281/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8282/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8283/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8284/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8285/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8286/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8287///
8288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8289#[inline]
8290#[target_feature(enable = "avx512f")]
8291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8292#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8293#[rustc_legacy_const_generics(4)]
8294pub unsafe fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8295 a: __m512d,
8296 k: __mmask8,
8297 b: __m512d,
8298 c: __m512d,
8299) -> __m512d {
8300 static_assert_rounding!(ROUNDING);
8301 let a: f64x8 = a.as_f64x8();
8302 let b: f64x8 = b.as_f64x8();
8303 let c: f64x8 = c.as_f64x8();
8304 let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8305 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8306}
8307
8308/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8309///
8310/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8311/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8312/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8313/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8314/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8315/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8316///
8317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8318#[inline]
8319#[target_feature(enable = "avx512f")]
8320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8321#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8322#[rustc_legacy_const_generics(4)]
8323pub unsafe fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8324 k: __mmask8,
8325 a: __m512d,
8326 b: __m512d,
8327 c: __m512d,
8328) -> __m512d {
8329 static_assert_rounding!(ROUNDING);
8330 let a: f64x8 = a.as_f64x8();
8331 let b: f64x8 = b.as_f64x8();
8332 let c: f64x8 = c.as_f64x8();
8333 let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8334 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
8335 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8336}
8337
8338/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8339///
8340/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8341/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8342/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8343/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8344/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8345/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8346///
8347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8348#[inline]
8349#[target_feature(enable = "avx512f")]
8350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8351#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8352#[rustc_legacy_const_generics(4)]
8353pub unsafe fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8354 a: __m512d,
8355 b: __m512d,
8356 c: __m512d,
8357 k: __mmask8,
8358) -> __m512d {
8359 static_assert_rounding!(ROUNDING);
8360 let a: f64x8 = a.as_f64x8();
8361 let b: f64x8 = b.as_f64x8();
8362 let c: f64x8 = c.as_f64x8();
8363 let r: f64x8 = vfmadd132pdround(a, b, c, ROUNDING);
8364 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8365}
8366
8367/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8368///
8369/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8370/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8371/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8372/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8373/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8374/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8375///
8376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8377#[inline]
8378#[target_feature(enable = "avx512f")]
8379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8380#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8381#[rustc_legacy_const_generics(3)]
8382pub unsafe fn _mm512_fmsub_round_ps<const ROUNDING: i32>(
8383 a: __m512,
8384 b: __m512,
8385 c: __m512,
8386) -> __m512 {
8387 static_assert_rounding!(ROUNDING);
8388 let zero: f32x16 = mem::zeroed();
8389 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8390 let a: f32x16 = a.as_f32x16();
8391 let b: f32x16 = b.as_f32x16();
8392 let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8393 transmute(src:r)
8394}
8395
8396/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8397///
8398/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8399/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8400/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8401/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8402/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8403/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8404///
8405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8406#[inline]
8407#[target_feature(enable = "avx512f")]
8408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8409#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8410#[rustc_legacy_const_generics(4)]
8411pub unsafe fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8412 a: __m512,
8413 k: __mmask16,
8414 b: __m512,
8415 c: __m512,
8416) -> __m512 {
8417 static_assert_rounding!(ROUNDING);
8418 let zero: f32x16 = mem::zeroed();
8419 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8420 let a: f32x16 = a.as_f32x16();
8421 let b: f32x16 = b.as_f32x16();
8422 let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8423 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8424}
8425
8426/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8427///
8428/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8429/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8430/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8431/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8432/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8433/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8434///
8435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8436#[inline]
8437#[target_feature(enable = "avx512f")]
8438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8439#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8440#[rustc_legacy_const_generics(4)]
8441pub unsafe fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8442 k: __mmask16,
8443 a: __m512,
8444 b: __m512,
8445 c: __m512,
8446) -> __m512 {
8447 static_assert_rounding!(ROUNDING);
8448 let zero: f32x16 = mem::zeroed();
8449 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8450 let a: f32x16 = a.as_f32x16();
8451 let b: f32x16 = b.as_f32x16();
8452 let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8453 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8454}
8455
8456/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8457///
8458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8459/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8460/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8461/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8462/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8463/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8464///
8465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8466#[inline]
8467#[target_feature(enable = "avx512f")]
8468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8469#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8470#[rustc_legacy_const_generics(4)]
8471pub unsafe fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8472 a: __m512,
8473 b: __m512,
8474 c: __m512,
8475 k: __mmask16,
8476) -> __m512 {
8477 static_assert_rounding!(ROUNDING);
8478 let zero: f32x16 = mem::zeroed();
8479 let c: f32x16 = c.as_f32x16();
8480 let sub: f32x16 = simd_sub(lhs:zero, rhs:c);
8481 let a: f32x16 = a.as_f32x16();
8482 let b: f32x16 = b.as_f32x16();
8483 let r: f32x16 = vfmadd132psround(a, b, c:sub, ROUNDING);
8484 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8485}
8486
8487/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8488///
8489/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8490/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8491/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8492/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8493/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8494/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8495///
8496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8497#[inline]
8498#[target_feature(enable = "avx512f")]
8499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8500#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8501#[rustc_legacy_const_generics(3)]
8502pub unsafe fn _mm512_fmsub_round_pd<const ROUNDING: i32>(
8503 a: __m512d,
8504 b: __m512d,
8505 c: __m512d,
8506) -> __m512d {
8507 static_assert_rounding!(ROUNDING);
8508 let zero: f64x8 = mem::zeroed();
8509 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8510 let a: f64x8 = a.as_f64x8();
8511 let b: f64x8 = b.as_f64x8();
8512 let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8513 transmute(src:r)
8514}
8515
8516/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8517///
8518/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8519/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8520/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8521/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8522/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8523/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8524///
8525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8526#[inline]
8527#[target_feature(enable = "avx512f")]
8528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8529#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8530#[rustc_legacy_const_generics(4)]
8531pub unsafe fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8532 a: __m512d,
8533 k: __mmask8,
8534 b: __m512d,
8535 c: __m512d,
8536) -> __m512d {
8537 static_assert_rounding!(ROUNDING);
8538 let zero: f64x8 = mem::zeroed();
8539 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8540 let a: f64x8 = a.as_f64x8();
8541 let b: f64x8 = b.as_f64x8();
8542 let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8543 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8544}
8545
8546/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8547///
8548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8549/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8550/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8551/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8552/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8553/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8554///
8555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8556#[inline]
8557#[target_feature(enable = "avx512f")]
8558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8559#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8560#[rustc_legacy_const_generics(4)]
8561pub unsafe fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8562 k: __mmask8,
8563 a: __m512d,
8564 b: __m512d,
8565 c: __m512d,
8566) -> __m512d {
8567 static_assert_rounding!(ROUNDING);
8568 let zero: f64x8 = mem::zeroed();
8569 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8570 let a: f64x8 = a.as_f64x8();
8571 let b: f64x8 = b.as_f64x8();
8572 let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8573 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8574}
8575
8576/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8577///
8578/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8579/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8580/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8581/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8582/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8583/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8584///
8585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8586#[inline]
8587#[target_feature(enable = "avx512f")]
8588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8589#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8590#[rustc_legacy_const_generics(4)]
8591pub unsafe fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8592 a: __m512d,
8593 b: __m512d,
8594 c: __m512d,
8595 k: __mmask8,
8596) -> __m512d {
8597 static_assert_rounding!(ROUNDING);
8598 let zero: f64x8 = mem::zeroed();
8599 let c: f64x8 = c.as_f64x8();
8600 let sub: f64x8 = simd_sub(lhs:zero, rhs:c);
8601 let a: f64x8 = a.as_f64x8();
8602 let b: f64x8 = b.as_f64x8();
8603 let r: f64x8 = vfmadd132pdround(a, b, c:sub, ROUNDING);
8604 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8605}
8606
8607/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8608///
8609/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8610/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8611/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8612/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8613/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8614/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8615///
8616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8617#[inline]
8618#[target_feature(enable = "avx512f")]
8619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8620#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8621#[rustc_legacy_const_generics(3)]
8622pub unsafe fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(
8623 a: __m512,
8624 b: __m512,
8625 c: __m512,
8626) -> __m512 {
8627 static_assert_rounding!(ROUNDING);
8628 let a: f32x16 = a.as_f32x16();
8629 let b: f32x16 = b.as_f32x16();
8630 let c: f32x16 = c.as_f32x16();
8631 let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8632 transmute(src:r)
8633}
8634
8635/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8636///
8637/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8638/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8639/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8640/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8641/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8642/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8643///
8644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8645#[inline]
8646#[target_feature(enable = "avx512f")]
8647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8648#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8649#[rustc_legacy_const_generics(4)]
8650pub unsafe fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8651 a: __m512,
8652 k: __mmask16,
8653 b: __m512,
8654 c: __m512,
8655) -> __m512 {
8656 static_assert_rounding!(ROUNDING);
8657 let a: f32x16 = a.as_f32x16();
8658 let b: f32x16 = b.as_f32x16();
8659 let c: f32x16 = c.as_f32x16();
8660 let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8661 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8662}
8663
8664/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8665///
8666/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8667/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8668/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8669/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8670/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8671/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8672///
8673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8674#[inline]
8675#[target_feature(enable = "avx512f")]
8676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8677#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8678#[rustc_legacy_const_generics(4)]
8679pub unsafe fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8680 k: __mmask16,
8681 a: __m512,
8682 b: __m512,
8683 c: __m512,
8684) -> __m512 {
8685 static_assert_rounding!(ROUNDING);
8686 let a: f32x16 = a.as_f32x16();
8687 let b: f32x16 = b.as_f32x16();
8688 let c: f32x16 = c.as_f32x16();
8689 let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8690 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
8691 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8692}
8693
8694/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8695///
8696/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8697/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8698/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8699/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8700/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8701/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8702///
8703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8704#[inline]
8705#[target_feature(enable = "avx512f")]
8706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8707#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8708#[rustc_legacy_const_generics(4)]
8709pub unsafe fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8710 a: __m512,
8711 b: __m512,
8712 c: __m512,
8713 k: __mmask16,
8714) -> __m512 {
8715 static_assert_rounding!(ROUNDING);
8716 let a: f32x16 = a.as_f32x16();
8717 let b: f32x16 = b.as_f32x16();
8718 let c: f32x16 = c.as_f32x16();
8719 let r: f32x16 = vfmaddsub213ps(a, b, c, ROUNDING);
8720 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8721}
8722
8723/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8724///
8725/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8726/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8727/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8728/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8729/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8730/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8731///
8732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8733#[inline]
8734#[target_feature(enable = "avx512f")]
8735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8736#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8737#[rustc_legacy_const_generics(3)]
8738pub unsafe fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
8739 a: __m512d,
8740 b: __m512d,
8741 c: __m512d,
8742) -> __m512d {
8743 static_assert_rounding!(ROUNDING);
8744 let a: f64x8 = a.as_f64x8();
8745 let b: f64x8 = b.as_f64x8();
8746 let c: f64x8 = c.as_f64x8();
8747 let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8748 transmute(src:r)
8749}
8750
8751/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8752///
8753/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8754/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8755/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8756/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8757/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8758/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8759///
8760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
8761#[inline]
8762#[target_feature(enable = "avx512f")]
8763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8764#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8765#[rustc_legacy_const_generics(4)]
8766pub unsafe fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
8767 a: __m512d,
8768 k: __mmask8,
8769 b: __m512d,
8770 c: __m512d,
8771) -> __m512d {
8772 static_assert_rounding!(ROUNDING);
8773 let a: f64x8 = a.as_f64x8();
8774 let b: f64x8 = b.as_f64x8();
8775 let c: f64x8 = c.as_f64x8();
8776 let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8777 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8784/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8785/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8786/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8787/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8794#[rustc_legacy_const_generics(4)]
8795pub unsafe fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
8796 k: __mmask8,
8797 a: __m512d,
8798 b: __m512d,
8799 c: __m512d,
8800) -> __m512d {
8801 static_assert_rounding!(ROUNDING);
8802 let a: f64x8 = a.as_f64x8();
8803 let b: f64x8 = b.as_f64x8();
8804 let c: f64x8 = c.as_f64x8();
8805 let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8806 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
8807 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8808}
8809
8810/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8811///
8812/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8813/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8814/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8815/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8816/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8817/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8818///
8819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
8820#[inline]
8821#[target_feature(enable = "avx512f")]
8822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8823#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8824#[rustc_legacy_const_generics(4)]
8825pub unsafe fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
8826 a: __m512d,
8827 b: __m512d,
8828 c: __m512d,
8829 k: __mmask8,
8830) -> __m512d {
8831 static_assert_rounding!(ROUNDING);
8832 let a: f64x8 = a.as_f64x8();
8833 let b: f64x8 = b.as_f64x8();
8834 let c: f64x8 = c.as_f64x8();
8835 let r: f64x8 = vfmaddsub213pd(a, b, c, ROUNDING);
8836 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8837}
8838
8839/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
8840///
8841/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8842/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8843/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8844/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8845/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8846/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8847///
8848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
8849#[inline]
8850#[target_feature(enable = "avx512f")]
8851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8852#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8853#[rustc_legacy_const_generics(3)]
8854pub unsafe fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(
8855 a: __m512,
8856 b: __m512,
8857 c: __m512,
8858) -> __m512 {
8859 static_assert_rounding!(ROUNDING);
8860 let zero: f32x16 = mem::zeroed();
8861 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8862 let a: f32x16 = a.as_f32x16();
8863 let b: f32x16 = b.as_f32x16();
8864 let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8865 transmute(src:r)
8866}
8867
8868/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8869///
8870/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8871/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8872/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8873/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8874/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8875/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8876///
8877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
8878#[inline]
8879#[target_feature(enable = "avx512f")]
8880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8881#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8882#[rustc_legacy_const_generics(4)]
8883pub unsafe fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
8884 a: __m512,
8885 k: __mmask16,
8886 b: __m512,
8887 c: __m512,
8888) -> __m512 {
8889 static_assert_rounding!(ROUNDING);
8890 let zero: f32x16 = mem::zeroed();
8891 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8892 let a: f32x16 = a.as_f32x16();
8893 let b: f32x16 = b.as_f32x16();
8894 let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8895 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
8896}
8897
8898/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8899///
8900/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8901/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8902/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8903/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8904/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8905/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8906///
8907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
8908#[inline]
8909#[target_feature(enable = "avx512f")]
8910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8911#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8912#[rustc_legacy_const_generics(4)]
8913pub unsafe fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
8914 k: __mmask16,
8915 a: __m512,
8916 b: __m512,
8917 c: __m512,
8918) -> __m512 {
8919 static_assert_rounding!(ROUNDING);
8920 let zero: f32x16 = mem::zeroed();
8921 let sub: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
8922 let a: f32x16 = a.as_f32x16();
8923 let b: f32x16 = b.as_f32x16();
8924 let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8925 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
8926}
8927
8928/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929///
8930/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8932/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8933/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8934/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8935/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8936///
8937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
8938#[inline]
8939#[target_feature(enable = "avx512f")]
8940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8941#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8942#[rustc_legacy_const_generics(4)]
8943pub unsafe fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
8944 a: __m512,
8945 b: __m512,
8946 c: __m512,
8947 k: __mmask16,
8948) -> __m512 {
8949 static_assert_rounding!(ROUNDING);
8950 let zero: f32x16 = mem::zeroed();
8951 let c: f32x16 = c.as_f32x16();
8952 let sub: f32x16 = simd_sub(lhs:zero, rhs:c);
8953 let a: f32x16 = a.as_f32x16();
8954 let b: f32x16 = b.as_f32x16();
8955 let r: f32x16 = vfmaddsub213ps(a, b, c:sub, ROUNDING);
8956 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
8957}
8958
8959/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
8960///
8961/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8962/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8963/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8964/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8965/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8966/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8967///
8968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
8969#[inline]
8970#[target_feature(enable = "avx512f")]
8971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8972#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
8973#[rustc_legacy_const_generics(3)]
8974pub unsafe fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
8975 a: __m512d,
8976 b: __m512d,
8977 c: __m512d,
8978) -> __m512d {
8979 static_assert_rounding!(ROUNDING);
8980 let zero: f64x8 = mem::zeroed();
8981 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
8982 let a: f64x8 = a.as_f64x8();
8983 let b: f64x8 = b.as_f64x8();
8984 let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
8985 transmute(src:r)
8986}
8987
8988/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8989///
8990/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8991/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8992/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8993/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8994/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8995/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8996///
8997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
8998#[inline]
8999#[target_feature(enable = "avx512f")]
9000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9001#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9002#[rustc_legacy_const_generics(4)]
9003pub unsafe fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9004 a: __m512d,
9005 k: __mmask8,
9006 b: __m512d,
9007 c: __m512d,
9008) -> __m512d {
9009 static_assert_rounding!(ROUNDING);
9010 let zero: f64x8 = mem::zeroed();
9011 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9012 let a: f64x8 = a.as_f64x8();
9013 let b: f64x8 = b.as_f64x8();
9014 let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
9015 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9016}
9017
9018/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9019///
9020/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9021/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9022/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9023/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9024/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9025/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9026///
9027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9028#[inline]
9029#[target_feature(enable = "avx512f")]
9030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9031#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9032#[rustc_legacy_const_generics(4)]
9033pub unsafe fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9034 k: __mmask8,
9035 a: __m512d,
9036 b: __m512d,
9037 c: __m512d,
9038) -> __m512d {
9039 static_assert_rounding!(ROUNDING);
9040 let zero: f64x8 = mem::zeroed();
9041 let sub: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9042 let a: f64x8 = a.as_f64x8();
9043 let b: f64x8 = b.as_f64x8();
9044 let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
9045 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9046}
9047
9048/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9049///
9050/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9051/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9052/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9053/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9054/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9055/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9056///
9057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9058#[inline]
9059#[target_feature(enable = "avx512f")]
9060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9061#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9062#[rustc_legacy_const_generics(4)]
9063pub unsafe fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9064 a: __m512d,
9065 b: __m512d,
9066 c: __m512d,
9067 k: __mmask8,
9068) -> __m512d {
9069 static_assert_rounding!(ROUNDING);
9070 let zero: f64x8 = mem::zeroed();
9071 let c: f64x8 = c.as_f64x8();
9072 let sub: f64x8 = simd_sub(lhs:zero, rhs:c);
9073 let a: f64x8 = a.as_f64x8();
9074 let b: f64x8 = b.as_f64x8();
9075 let r: f64x8 = vfmaddsub213pd(a, b, c:sub, ROUNDING);
9076 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9077}
9078
9079/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9080///
9081/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9082/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9083/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9084/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9085/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9086/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9087///
9088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9089#[inline]
9090#[target_feature(enable = "avx512f")]
9091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9092#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9093#[rustc_legacy_const_generics(3)]
9094pub unsafe fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(
9095 a: __m512,
9096 b: __m512,
9097 c: __m512,
9098) -> __m512 {
9099 static_assert_rounding!(ROUNDING);
9100 let zero: f32x16 = mem::zeroed();
9101 let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9102 let b: f32x16 = b.as_f32x16();
9103 let c: f32x16 = c.as_f32x16();
9104 let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9105 transmute(src:r)
9106}
9107
9108/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9109///
9110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9111/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9112/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9113/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9114/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9115/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9116///
9117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9118#[inline]
9119#[target_feature(enable = "avx512f")]
9120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9121#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9122#[rustc_legacy_const_generics(4)]
9123pub unsafe fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9124 a: __m512,
9125 k: __mmask16,
9126 b: __m512,
9127 c: __m512,
9128) -> __m512 {
9129 static_assert_rounding!(ROUNDING);
9130 let zero: f32x16 = mem::zeroed();
9131 let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9132 let b: f32x16 = b.as_f32x16();
9133 let c: f32x16 = c.as_f32x16();
9134 let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9135 transmute(src:simd_select_bitmask(m:k, yes:r, no:a.as_f32x16()))
9136}
9137
9138/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9139///
9140/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9141/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9142/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9143/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9144/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9145/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9146///
9147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9148#[inline]
9149#[target_feature(enable = "avx512f")]
9150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9151#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9152#[rustc_legacy_const_generics(4)]
9153pub unsafe fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9154 k: __mmask16,
9155 a: __m512,
9156 b: __m512,
9157 c: __m512,
9158) -> __m512 {
9159 static_assert_rounding!(ROUNDING);
9160 let zero: f32x16 = mem::zeroed();
9161 let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9162 let b: f32x16 = b.as_f32x16();
9163 let c: f32x16 = c.as_f32x16();
9164 let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9165 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9166}
9167
9168/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9169///
9170/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9171/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9172/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9173/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9174/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9175/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9176///
9177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9178#[inline]
9179#[target_feature(enable = "avx512f")]
9180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9181#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9182#[rustc_legacy_const_generics(4)]
9183pub unsafe fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9184 a: __m512,
9185 b: __m512,
9186 c: __m512,
9187 k: __mmask16,
9188) -> __m512 {
9189 static_assert_rounding!(ROUNDING);
9190 let zero: f32x16 = mem::zeroed();
9191 let sub: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9192 let b: f32x16 = b.as_f32x16();
9193 let c: f32x16 = c.as_f32x16();
9194 let r: f32x16 = vfmadd132psround(a:sub, b, c, ROUNDING);
9195 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9196}
9197
9198/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9199///
9200/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9201/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9202/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9203/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9204/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9205/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9206///
9207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
9208#[inline]
9209#[target_feature(enable = "avx512f")]
9210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9211#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9212#[rustc_legacy_const_generics(3)]
9213pub unsafe fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(
9214 a: __m512d,
9215 b: __m512d,
9216 c: __m512d,
9217) -> __m512d {
9218 static_assert_rounding!(ROUNDING);
9219 let zero: f64x8 = mem::zeroed();
9220 let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9221 let b: f64x8 = b.as_f64x8();
9222 let c: f64x8 = c.as_f64x8();
9223 let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9224 transmute(src:r)
9225}
9226
9227/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9228///
9229/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9230/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9231/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9232/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9233/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9234/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9235///
9236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9237#[inline]
9238#[target_feature(enable = "avx512f")]
9239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9240#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9241#[rustc_legacy_const_generics(4)]
9242pub unsafe fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9243 a: __m512d,
9244 k: __mmask8,
9245 b: __m512d,
9246 c: __m512d,
9247) -> __m512d {
9248 static_assert_rounding!(ROUNDING);
9249 let zero: f64x8 = mem::zeroed();
9250 let a: f64x8 = a.as_f64x8();
9251 let sub: f64x8 = simd_sub(lhs:zero, rhs:a);
9252 let b: f64x8 = b.as_f64x8();
9253 let c: f64x8 = c.as_f64x8();
9254 let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9255 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9256}
9257
9258/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9259///
9260/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9261/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9262/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9263/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9264/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9265/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9266///
9267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9268#[inline]
9269#[target_feature(enable = "avx512f")]
9270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9271#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9272#[rustc_legacy_const_generics(4)]
9273pub unsafe fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9274 k: __mmask8,
9275 a: __m512d,
9276 b: __m512d,
9277 c: __m512d,
9278) -> __m512d {
9279 static_assert_rounding!(ROUNDING);
9280 let zero: f64x8 = mem::zeroed();
9281 let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9282 let b: f64x8 = b.as_f64x8();
9283 let c: f64x8 = c.as_f64x8();
9284 let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9285 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9286}
9287
9288/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9289///
9290/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9291/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9292/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9293/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9294/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9295/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9296///
9297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9298#[inline]
9299#[target_feature(enable = "avx512f")]
9300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9301#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9302#[rustc_legacy_const_generics(4)]
9303pub unsafe fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9304 a: __m512d,
9305 b: __m512d,
9306 c: __m512d,
9307 k: __mmask8,
9308) -> __m512d {
9309 static_assert_rounding!(ROUNDING);
9310 let zero: f64x8 = mem::zeroed();
9311 let sub: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9312 let b: f64x8 = b.as_f64x8();
9313 let c: f64x8 = c.as_f64x8();
9314 let r: f64x8 = vfmadd132pdround(a:sub, b, c, ROUNDING);
9315 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9316}
9317
9318/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9319///
9320/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9321/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9322/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9323/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9324/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9325/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9326///
9327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9328#[inline]
9329#[target_feature(enable = "avx512f")]
9330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9331#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9332#[rustc_legacy_const_generics(3)]
9333pub unsafe fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(
9334 a: __m512,
9335 b: __m512,
9336 c: __m512,
9337) -> __m512 {
9338 static_assert_rounding!(ROUNDING);
9339 let zero: f32x16 = mem::zeroed();
9340 let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9341 let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
9342 let b: f32x16 = b.as_f32x16();
9343 let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9344 transmute(src:r)
9345}
9346
9347/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9348///
9349/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9350/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9351/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9352/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9353/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9354/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9355///
9356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9357#[inline]
9358#[target_feature(enable = "avx512f")]
9359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9360#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9361#[rustc_legacy_const_generics(4)]
9362pub unsafe fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9363 a: __m512,
9364 k: __mmask16,
9365 b: __m512,
9366 c: __m512,
9367) -> __m512 {
9368 static_assert_rounding!(ROUNDING);
9369 let zero: f32x16 = mem::zeroed();
9370 let a: f32x16 = a.as_f32x16();
9371 let suba: f32x16 = simd_sub(lhs:zero, rhs:a);
9372 let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
9373 let b: f32x16 = b.as_f32x16();
9374 let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9375 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9376}
9377
9378/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9379///
9380/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9381/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9382/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9383/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9384/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9385/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9386///
9387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9388#[inline]
9389#[target_feature(enable = "avx512f")]
9390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9391#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9392#[rustc_legacy_const_generics(4)]
9393pub unsafe fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9394 k: __mmask16,
9395 a: __m512,
9396 b: __m512,
9397 c: __m512,
9398) -> __m512 {
9399 static_assert_rounding!(ROUNDING);
9400 let zero: f32x16 = mem::zeroed();
9401 let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9402 let subc: f32x16 = simd_sub(lhs:zero, rhs:c.as_f32x16());
9403 let b: f32x16 = b.as_f32x16();
9404 let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9405 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9406}
9407
9408/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9409///
9410/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9411/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9412/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9413/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9414/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9415/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9416///
9417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9418#[inline]
9419#[target_feature(enable = "avx512f")]
9420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9421#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9422#[rustc_legacy_const_generics(4)]
9423pub unsafe fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9424 a: __m512,
9425 b: __m512,
9426 c: __m512,
9427 k: __mmask16,
9428) -> __m512 {
9429 static_assert_rounding!(ROUNDING);
9430 let zero: f32x16 = mem::zeroed();
9431 let suba: f32x16 = simd_sub(lhs:zero, rhs:a.as_f32x16());
9432 let c: f32x16 = c.as_f32x16();
9433 let subc: f32x16 = simd_sub(lhs:zero, rhs:c);
9434 let b: f32x16 = b.as_f32x16();
9435 let r: f32x16 = vfmadd132psround(a:suba, b, c:subc, ROUNDING);
9436 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9437}
9438
9439/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9440///
9441/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9442/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9443/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9444/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9445/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9446/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9447///
9448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9449#[inline]
9450#[target_feature(enable = "avx512f")]
9451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9452#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9453#[rustc_legacy_const_generics(3)]
9454pub unsafe fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(
9455 a: __m512d,
9456 b: __m512d,
9457 c: __m512d,
9458) -> __m512d {
9459 static_assert_rounding!(ROUNDING);
9460 let zero: f64x8 = mem::zeroed();
9461 let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9462 let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9463 let b: f64x8 = b.as_f64x8();
9464 let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9465 transmute(src:r)
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9472/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9473/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9474/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9475/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9482#[rustc_legacy_const_generics(4)]
9483pub unsafe fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9484 a: __m512d,
9485 k: __mmask8,
9486 b: __m512d,
9487 c: __m512d,
9488) -> __m512d {
9489 static_assert_rounding!(ROUNDING);
9490 let zero: f64x8 = mem::zeroed();
9491 let a: f64x8 = a.as_f64x8();
9492 let suba: f64x8 = simd_sub(lhs:zero, rhs:a);
9493 let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9494 let b: f64x8 = b.as_f64x8();
9495 let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9496 transmute(src:simd_select_bitmask(m:k, yes:r, no:a))
9497}
9498
9499/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9500///
9501/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9502/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9503/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9504/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9505/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9506/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9507///
9508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9509#[inline]
9510#[target_feature(enable = "avx512f")]
9511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9512#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9513#[rustc_legacy_const_generics(4)]
9514pub unsafe fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9515 k: __mmask8,
9516 a: __m512d,
9517 b: __m512d,
9518 c: __m512d,
9519) -> __m512d {
9520 static_assert_rounding!(ROUNDING);
9521 let zero: f64x8 = mem::zeroed();
9522 let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9523 let subc: f64x8 = simd_sub(lhs:zero, rhs:c.as_f64x8());
9524 let b: f64x8 = b.as_f64x8();
9525 let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9526 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9527}
9528
9529/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9530///
9531/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9532/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
9533/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
9534/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
9535/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
9536/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
9537///
9538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9539#[inline]
9540#[target_feature(enable = "avx512f")]
9541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9542#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9543#[rustc_legacy_const_generics(4)]
9544pub unsafe fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9545 a: __m512d,
9546 b: __m512d,
9547 c: __m512d,
9548 k: __mmask8,
9549) -> __m512d {
9550 static_assert_rounding!(ROUNDING);
9551 let zero: f64x8 = mem::zeroed();
9552 let suba: f64x8 = simd_sub(lhs:zero, rhs:a.as_f64x8());
9553 let c: f64x8 = c.as_f64x8();
9554 let subc: f64x8 = simd_sub(lhs:zero, rhs:c);
9555 let b: f64x8 = b.as_f64x8();
9556 let r: f64x8 = vfmadd132pdround(a:suba, b, c:subc, ROUNDING);
9557 transmute(src:simd_select_bitmask(m:k, yes:r, no:c))
9558}
9559
9560/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9561/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9562///
9563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9564#[inline]
9565#[target_feature(enable = "avx512f")]
9566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9567#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9568#[rustc_legacy_const_generics(2)]
9569pub unsafe fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9570 static_assert_sae!(SAE);
9571 let a: f32x16 = a.as_f32x16();
9572 let b: f32x16 = b.as_f32x16();
9573 let r: f32x16 = vmaxps(a, b, SAE);
9574 transmute(src:r)
9575}
9576
9577/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9578/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9579///
9580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9581#[inline]
9582#[target_feature(enable = "avx512f")]
9583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9584#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9585#[rustc_legacy_const_generics(4)]
9586pub unsafe fn _mm512_mask_max_round_ps<const SAE: i32>(
9587 src: __m512,
9588 k: __mmask16,
9589 a: __m512,
9590 b: __m512,
9591) -> __m512 {
9592 static_assert_sae!(SAE);
9593 let a: f32x16 = a.as_f32x16();
9594 let b: f32x16 = b.as_f32x16();
9595 let r: f32x16 = vmaxps(a, b, SAE);
9596 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9597}
9598
9599/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9600/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9601///
9602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9603#[inline]
9604#[target_feature(enable = "avx512f")]
9605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9606#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9607#[rustc_legacy_const_generics(3)]
9608pub unsafe fn _mm512_maskz_max_round_ps<const SAE: i32>(
9609 k: __mmask16,
9610 a: __m512,
9611 b: __m512,
9612) -> __m512 {
9613 static_assert_sae!(SAE);
9614 let a: f32x16 = a.as_f32x16();
9615 let b: f32x16 = b.as_f32x16();
9616 let r: f32x16 = vmaxps(a, b, SAE);
9617 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9618 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9619}
9620
9621/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9622/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9623///
9624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9625#[inline]
9626#[target_feature(enable = "avx512f")]
9627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9628#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9629#[rustc_legacy_const_generics(2)]
9630pub unsafe fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9631 static_assert_sae!(SAE);
9632 let a: f64x8 = a.as_f64x8();
9633 let b: f64x8 = b.as_f64x8();
9634 let r: f64x8 = vmaxpd(a, b, SAE);
9635 transmute(src:r)
9636}
9637
9638/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9639/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9640///
9641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9642#[inline]
9643#[target_feature(enable = "avx512f")]
9644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9645#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9646#[rustc_legacy_const_generics(4)]
9647pub unsafe fn _mm512_mask_max_round_pd<const SAE: i32>(
9648 src: __m512d,
9649 k: __mmask8,
9650 a: __m512d,
9651 b: __m512d,
9652) -> __m512d {
9653 static_assert_sae!(SAE);
9654 let a: f64x8 = a.as_f64x8();
9655 let b: f64x8 = b.as_f64x8();
9656 let r: f64x8 = vmaxpd(a, b, SAE);
9657 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9658}
9659
9660/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9661/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9662///
9663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9664#[inline]
9665#[target_feature(enable = "avx512f")]
9666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9667#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9668#[rustc_legacy_const_generics(3)]
9669pub unsafe fn _mm512_maskz_max_round_pd<const SAE: i32>(
9670 k: __mmask8,
9671 a: __m512d,
9672 b: __m512d,
9673) -> __m512d {
9674 static_assert_sae!(SAE);
9675 let a: f64x8 = a.as_f64x8();
9676 let b: f64x8 = b.as_f64x8();
9677 let r: f64x8 = vmaxpd(a, b, SAE);
9678 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9679 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9680}
9681
9682/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9683/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9684///
9685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9686#[inline]
9687#[target_feature(enable = "avx512f")]
9688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9689#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9690#[rustc_legacy_const_generics(2)]
9691pub unsafe fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9692 static_assert_sae!(SAE);
9693 let a: f32x16 = a.as_f32x16();
9694 let b: f32x16 = b.as_f32x16();
9695 let r: f32x16 = vminps(a, b, SAE);
9696 transmute(src:r)
9697}
9698
9699/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9700/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9701///
9702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9703#[inline]
9704#[target_feature(enable = "avx512f")]
9705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9706#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9707#[rustc_legacy_const_generics(4)]
9708pub unsafe fn _mm512_mask_min_round_ps<const SAE: i32>(
9709 src: __m512,
9710 k: __mmask16,
9711 a: __m512,
9712 b: __m512,
9713) -> __m512 {
9714 static_assert_sae!(SAE);
9715 let a: f32x16 = a.as_f32x16();
9716 let b: f32x16 = b.as_f32x16();
9717 let r: f32x16 = vminps(a, b, SAE);
9718 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9719}
9720
9721/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9722/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9723///
9724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9725#[inline]
9726#[target_feature(enable = "avx512f")]
9727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9728#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9729#[rustc_legacy_const_generics(3)]
9730pub unsafe fn _mm512_maskz_min_round_ps<const SAE: i32>(
9731 k: __mmask16,
9732 a: __m512,
9733 b: __m512,
9734) -> __m512 {
9735 static_assert_sae!(SAE);
9736 let a: f32x16 = a.as_f32x16();
9737 let b: f32x16 = b.as_f32x16();
9738 let r: f32x16 = vminps(a, b, SAE);
9739 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9740 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9741}
9742
9743/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9744/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9745///
9746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9747#[inline]
9748#[target_feature(enable = "avx512f")]
9749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9750#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9751#[rustc_legacy_const_generics(2)]
9752pub unsafe fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9753 static_assert_sae!(SAE);
9754 let a: f64x8 = a.as_f64x8();
9755 let b: f64x8 = b.as_f64x8();
9756 let r: f64x8 = vminpd(a, b, SAE);
9757 transmute(src:r)
9758}
9759
9760/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9761/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9762///
9763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9764#[inline]
9765#[target_feature(enable = "avx512f")]
9766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9767#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9768#[rustc_legacy_const_generics(4)]
9769pub unsafe fn _mm512_mask_min_round_pd<const SAE: i32>(
9770 src: __m512d,
9771 k: __mmask8,
9772 a: __m512d,
9773 b: __m512d,
9774) -> __m512d {
9775 static_assert_sae!(SAE);
9776 let a: f64x8 = a.as_f64x8();
9777 let b: f64x8 = b.as_f64x8();
9778 let r: f64x8 = vminpd(a, b, SAE);
9779 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9780}
9781
9782/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9784///
9785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9786#[inline]
9787#[target_feature(enable = "avx512f")]
9788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9789#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9790#[rustc_legacy_const_generics(3)]
9791pub unsafe fn _mm512_maskz_min_round_pd<const SAE: i32>(
9792 k: __mmask8,
9793 a: __m512d,
9794 b: __m512d,
9795) -> __m512d {
9796 static_assert_sae!(SAE);
9797 let a: f64x8 = a.as_f64x8();
9798 let b: f64x8 = b.as_f64x8();
9799 let r: f64x8 = vminpd(a, b, SAE);
9800 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9801 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
9802}
9803
9804/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9805/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9806///
9807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9808#[inline]
9809#[target_feature(enable = "avx512f")]
9810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9811#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9812#[rustc_legacy_const_generics(1)]
9813pub unsafe fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9814 static_assert_sae!(SAE);
9815 let a: f32x16 = a.as_f32x16();
9816 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9817 let r: f32x16 = vgetexpps(a, src:zero, m:0b11111111_11111111, SAE);
9818 transmute(src:r)
9819}
9820
9821/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9822/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9823///
9824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
9825#[inline]
9826#[target_feature(enable = "avx512f")]
9827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9828#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9829#[rustc_legacy_const_generics(3)]
9830pub unsafe fn _mm512_mask_getexp_round_ps<const SAE: i32>(
9831 src: __m512,
9832 k: __mmask16,
9833 a: __m512,
9834) -> __m512 {
9835 static_assert_sae!(SAE);
9836 let a: f32x16 = a.as_f32x16();
9837 let src: f32x16 = src.as_f32x16();
9838 let r: f32x16 = vgetexpps(a, src, m:k, SAE);
9839 transmute(src:r)
9840}
9841
9842/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9843/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9844///
9845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
9846#[inline]
9847#[target_feature(enable = "avx512f")]
9848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9849#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9850#[rustc_legacy_const_generics(2)]
9851pub unsafe fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
9852 static_assert_sae!(SAE);
9853 let a: f32x16 = a.as_f32x16();
9854 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9855 let r: f32x16 = vgetexpps(a, src:zero, m:k, SAE);
9856 transmute(src:r)
9857}
9858
9859/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9860/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9861///
9862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
9863#[inline]
9864#[target_feature(enable = "avx512f")]
9865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9866#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
9867#[rustc_legacy_const_generics(1)]
9868pub unsafe fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
9869 static_assert_sae!(SAE);
9870 let a: f64x8 = a.as_f64x8();
9871 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9872 let r: f64x8 = vgetexppd(a, src:zero, m:0b11111111, SAE);
9873 transmute(src:r)
9874}
9875
9876/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9877/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9878///
9879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
9880#[inline]
9881#[target_feature(enable = "avx512f")]
9882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9883#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
9884#[rustc_legacy_const_generics(3)]
9885pub unsafe fn _mm512_mask_getexp_round_pd<const SAE: i32>(
9886 src: __m512d,
9887 k: __mmask8,
9888 a: __m512d,
9889) -> __m512d {
9890 static_assert_sae!(SAE);
9891 let a: f64x8 = a.as_f64x8();
9892 let src: f64x8 = src.as_f64x8();
9893 let r: f64x8 = vgetexppd(a, src, m:k, SAE);
9894 transmute(src:r)
9895}
9896
9897/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9898/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9899///
9900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
9901#[inline]
9902#[target_feature(enable = "avx512f")]
9903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9904#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
9905#[rustc_legacy_const_generics(2)]
9906pub unsafe fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
9907 static_assert_sae!(SAE);
9908 let a: f64x8 = a.as_f64x8();
9909 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
9910 let r: f64x8 = vgetexppd(a, src:zero, m:k, SAE);
9911 transmute(src:r)
9912}
9913
9914/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
9915/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9916/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9917/// _MM_FROUND_TO_NEG_INF // round down\
9918/// _MM_FROUND_TO_POS_INF // round up\
9919/// _MM_FROUND_TO_ZERO // truncate\
9920/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
9921///
9922/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
9924#[inline]
9925#[target_feature(enable = "avx512f")]
9926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9927#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
9928#[rustc_legacy_const_generics(1, 2)]
9929pub unsafe fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
9930 static_assert_uimm_bits!(IMM8, 8);
9931 static_assert_mantissas_sae!(SAE);
9932 let a: f32x16 = a.as_f32x16();
9933 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9934 let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:0b11111111_11111111, SAE);
9935 transmute(src:r)
9936}
9937
9938/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9939/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9940/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9941/// _MM_FROUND_TO_NEG_INF // round down\
9942/// _MM_FROUND_TO_POS_INF // round up\
9943/// _MM_FROUND_TO_ZERO // truncate\
9944/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
9945///
9946/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
9948#[inline]
9949#[target_feature(enable = "avx512f")]
9950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9951#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
9952#[rustc_legacy_const_generics(3, 4)]
9953pub unsafe fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
9954 src: __m512,
9955 k: __mmask16,
9956 a: __m512,
9957) -> __m512 {
9958 static_assert_uimm_bits!(IMM8, 8);
9959 static_assert_mantissas_sae!(SAE);
9960 let a: f32x16 = a.as_f32x16();
9961 let src: f32x16 = src.as_f32x16();
9962 let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, SAE);
9963 transmute(src:r)
9964}
9965
9966/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9967/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9968/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9969/// _MM_FROUND_TO_NEG_INF // round down\
9970/// _MM_FROUND_TO_POS_INF // round up\
9971/// _MM_FROUND_TO_ZERO // truncate\
9972/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
9973///
9974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
9976#[inline]
9977#[target_feature(enable = "avx512f")]
9978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9979#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
9980#[rustc_legacy_const_generics(2, 3)]
9981pub unsafe fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
9982 k: __mmask16,
9983 a: __m512,
9984) -> __m512 {
9985 static_assert_uimm_bits!(IMM8, 8);
9986 static_assert_mantissas_sae!(SAE);
9987 let a: f32x16 = a.as_f32x16();
9988 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
9989 let r: f32x16 = vrndscaleps(a, IMM8, src:zero, mask:k, SAE);
9990 transmute(src:r)
9991}
9992
9993/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
9994/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9995/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
9996/// _MM_FROUND_TO_NEG_INF // round down\
9997/// _MM_FROUND_TO_POS_INF // round up\
9998/// _MM_FROUND_TO_ZERO // truncate\
9999/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
10000///
10001/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10003#[inline]
10004#[target_feature(enable = "avx512f")]
10005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10006#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10007#[rustc_legacy_const_generics(1, 2)]
10008pub unsafe fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10009 static_assert_uimm_bits!(IMM8, 8);
10010 static_assert_mantissas_sae!(SAE);
10011 let a: f64x8 = a.as_f64x8();
10012 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10013 let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:0b11111111, SAE);
10014 transmute(src:r)
10015}
10016
10017/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10018/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10019/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
10020/// _MM_FROUND_TO_NEG_INF // round down\
10021/// _MM_FROUND_TO_POS_INF // round up\
10022/// _MM_FROUND_TO_ZERO // truncate\
10023/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
10024///
10025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10027#[inline]
10028#[target_feature(enable = "avx512f")]
10029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10030#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10031#[rustc_legacy_const_generics(3, 4)]
10032pub unsafe fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10033 src: __m512d,
10034 k: __mmask8,
10035 a: __m512d,
10036) -> __m512d {
10037 static_assert_uimm_bits!(IMM8, 8);
10038 static_assert_mantissas_sae!(SAE);
10039 let a: f64x8 = a.as_f64x8();
10040 let src: f64x8 = src.as_f64x8();
10041 let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, SAE);
10042 transmute(src:r)
10043}
10044
10045/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10046/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10047/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
10048/// _MM_FROUND_TO_NEG_INF // round down\
10049/// _MM_FROUND_TO_POS_INF // round up\
10050/// _MM_FROUND_TO_ZERO // truncate\
10051/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
10052///
10053/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10055#[inline]
10056#[target_feature(enable = "avx512f")]
10057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10058#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10059#[rustc_legacy_const_generics(2, 3)]
10060pub unsafe fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10061 k: __mmask8,
10062 a: __m512d,
10063) -> __m512d {
10064 static_assert_uimm_bits!(IMM8, 8);
10065 static_assert_mantissas_sae!(SAE);
10066 let a: f64x8 = a.as_f64x8();
10067 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10068 let r: f64x8 = vrndscalepd(a, IMM8, src:zero, mask:k, SAE);
10069 transmute(src:r)
10070}
10071
10072/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10073///
10074/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10075/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10076/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10077/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10078/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10079/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10080///
10081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10082#[inline]
10083#[target_feature(enable = "avx512f")]
10084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10085#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10086#[rustc_legacy_const_generics(2)]
10087pub unsafe fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10088 static_assert_rounding!(ROUNDING);
10089 let a: f32x16 = a.as_f32x16();
10090 let b: f32x16 = b.as_f32x16();
10091 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10092 let r: f32x16 = vscalefps(a, b, src:zero, mask:0b11111111_11111111, ROUNDING);
10093 transmute(src:r)
10094}
10095
10096/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10097///
10098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10099/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10100/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10101/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10102/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10103/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10104///
10105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10106#[inline]
10107#[target_feature(enable = "avx512f")]
10108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10109#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10110#[rustc_legacy_const_generics(4)]
10111pub unsafe fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10112 src: __m512,
10113 k: __mmask16,
10114 a: __m512,
10115 b: __m512,
10116) -> __m512 {
10117 static_assert_rounding!(ROUNDING);
10118 let a: f32x16 = a.as_f32x16();
10119 let b: f32x16 = b.as_f32x16();
10120 let src: f32x16 = src.as_f32x16();
10121 let r: f32x16 = vscalefps(a, b, src, mask:k, ROUNDING);
10122 transmute(src:r)
10123}
10124
10125/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10126///
10127/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10128/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10129/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10130/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10131/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10132/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10133///
10134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10135#[inline]
10136#[target_feature(enable = "avx512f")]
10137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10138#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10139#[rustc_legacy_const_generics(3)]
10140pub unsafe fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10141 k: __mmask16,
10142 a: __m512,
10143 b: __m512,
10144) -> __m512 {
10145 static_assert_rounding!(ROUNDING);
10146 let a: f32x16 = a.as_f32x16();
10147 let b: f32x16 = b.as_f32x16();
10148 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10149 let r: f32x16 = vscalefps(a, b, src:zero, mask:k, ROUNDING);
10150 transmute(src:r)
10151}
10152
10153/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10154///
10155/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10156/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10157/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10158/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10159/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10160/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10161///
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10167#[rustc_legacy_const_generics(2)]
10168pub unsafe fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10169 static_assert_rounding!(ROUNDING);
10170 let a: f64x8 = a.as_f64x8();
10171 let b: f64x8 = b.as_f64x8();
10172 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10173 let r: f64x8 = vscalefpd(a, b, src:zero, mask:0b11111111, ROUNDING);
10174 transmute(src:r)
10175}
10176
10177/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10178///
10179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10180/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10181/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10182/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10183/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10184/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10185///
10186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10187#[inline]
10188#[target_feature(enable = "avx512f")]
10189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10190#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10191#[rustc_legacy_const_generics(4)]
10192pub unsafe fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10193 src: __m512d,
10194 k: __mmask8,
10195 a: __m512d,
10196 b: __m512d,
10197) -> __m512d {
10198 static_assert_rounding!(ROUNDING);
10199 let a: f64x8 = a.as_f64x8();
10200 let b: f64x8 = b.as_f64x8();
10201 let src: f64x8 = src.as_f64x8();
10202 let r: f64x8 = vscalefpd(a, b, src, mask:k, ROUNDING);
10203 transmute(src:r)
10204}
10205
10206/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10207///
10208/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10209/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
10210/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
10211/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
10212/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
10213/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
10214///
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10220#[rustc_legacy_const_generics(3)]
10221pub unsafe fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10222 k: __mmask8,
10223 a: __m512d,
10224 b: __m512d,
10225) -> __m512d {
10226 static_assert_rounding!(ROUNDING);
10227 let a: f64x8 = a.as_f64x8();
10228 let b: f64x8 = b.as_f64x8();
10229 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10230 let r: f64x8 = vscalefpd(a, b, src:zero, mask:k, ROUNDING);
10231 transmute(src:r)
10232}
10233
10234/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10235///
10236/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10238#[inline]
10239#[target_feature(enable = "avx512f")]
10240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10241#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10242#[rustc_legacy_const_generics(3, 4)]
10243pub unsafe fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10244 a: __m512,
10245 b: __m512,
10246 c: __m512i,
10247) -> __m512 {
10248 static_assert_uimm_bits!(IMM8, 8);
10249 static_assert_mantissas_sae!(SAE);
10250 let a: f32x16 = a.as_f32x16();
10251 let b: f32x16 = b.as_f32x16();
10252 let c: i32x16 = c.as_i32x16();
10253 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, SAE);
10254 transmute(src:r)
10255}
10256
10257/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10258///
10259/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10261#[inline]
10262#[target_feature(enable = "avx512f")]
10263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10264#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10265#[rustc_legacy_const_generics(4, 5)]
10266pub unsafe fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10267 a: __m512,
10268 k: __mmask16,
10269 b: __m512,
10270 c: __m512i,
10271) -> __m512 {
10272 static_assert_uimm_bits!(IMM8, 8);
10273 static_assert_mantissas_sae!(SAE);
10274 let a: f32x16 = a.as_f32x16();
10275 let b: f32x16 = b.as_f32x16();
10276 let c: i32x16 = c.as_i32x16();
10277 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10278 transmute(src:r)
10279}
10280
10281/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10282///
10283/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10285#[inline]
10286#[target_feature(enable = "avx512f")]
10287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10288#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10289#[rustc_legacy_const_generics(4, 5)]
10290pub unsafe fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10291 k: __mmask16,
10292 a: __m512,
10293 b: __m512,
10294 c: __m512i,
10295) -> __m512 {
10296 static_assert_uimm_bits!(IMM8, 8);
10297 static_assert_mantissas_sae!(SAE);
10298 let a: f32x16 = a.as_f32x16();
10299 let b: f32x16 = b.as_f32x16();
10300 let c: i32x16 = c.as_i32x16();
10301 let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10302 transmute(src:r)
10303}
10304
10305/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10306///
10307/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10309#[inline]
10310#[target_feature(enable = "avx512f")]
10311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10312#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10313#[rustc_legacy_const_generics(3, 4)]
10314pub unsafe fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10315 a: __m512d,
10316 b: __m512d,
10317 c: __m512i,
10318) -> __m512d {
10319 static_assert_uimm_bits!(IMM8, 8);
10320 static_assert_mantissas_sae!(SAE);
10321 let a: f64x8 = a.as_f64x8();
10322 let b: f64x8 = b.as_f64x8();
10323 let c: i64x8 = c.as_i64x8();
10324 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, SAE);
10325 transmute(src:r)
10326}
10327
10328/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10329///
10330/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10332#[inline]
10333#[target_feature(enable = "avx512f")]
10334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10335#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10336#[rustc_legacy_const_generics(4, 5)]
10337pub unsafe fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10338 a: __m512d,
10339 k: __mmask8,
10340 b: __m512d,
10341 c: __m512i,
10342) -> __m512d {
10343 static_assert_uimm_bits!(IMM8, 8);
10344 static_assert_mantissas_sae!(SAE);
10345 let a: f64x8 = a.as_f64x8();
10346 let b: f64x8 = b.as_f64x8();
10347 let c: i64x8 = c.as_i64x8();
10348 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10349 transmute(src:r)
10350}
10351
10352/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10353///
10354/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10356#[inline]
10357#[target_feature(enable = "avx512f")]
10358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10359#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10360#[rustc_legacy_const_generics(4, 5)]
10361pub unsafe fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10362 k: __mmask8,
10363 a: __m512d,
10364 b: __m512d,
10365 c: __m512i,
10366) -> __m512d {
10367 static_assert_uimm_bits!(IMM8, 8);
10368 static_assert_mantissas_sae!(SAE);
10369 let a: f64x8 = a.as_f64x8();
10370 let b: f64x8 = b.as_f64x8();
10371 let c: i64x8 = c.as_i64x8();
10372 let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10373 transmute(src:r)
10374}
10375
10376/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10378/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10379/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10380/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10381/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10382/// The sign is determined by sc which can take the following values:\
10383/// _MM_MANT_SIGN_src // sign = sign(src)\
10384/// _MM_MANT_SIGN_zero // sign = 0\
10385/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10386/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10387///
10388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10389#[inline]
10390#[target_feature(enable = "avx512f")]
10391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10392#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10393#[rustc_legacy_const_generics(1, 2, 3)]
10394pub unsafe fn _mm512_getmant_round_ps<
10395 const NORM: _MM_MANTISSA_NORM_ENUM,
10396 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10397 const SAE: i32,
10398>(
10399 a: __m512,
10400) -> __m512 {
10401 static_assert_uimm_bits!(NORM, 4);
10402 static_assert_uimm_bits!(SIGN, 2);
10403 static_assert_mantissas_sae!(SAE);
10404 let a: f32x16 = a.as_f32x16();
10405 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10406 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:zero, m:0b11111111_11111111, SAE);
10407 transmute(src:r)
10408}
10409
10410/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10411/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10412/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10413/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10414/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10415/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10416/// The sign is determined by sc which can take the following values:\
10417/// _MM_MANT_SIGN_src // sign = sign(src)\
10418/// _MM_MANT_SIGN_zero // sign = 0\
10419/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10420/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10421///
10422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10423#[inline]
10424#[target_feature(enable = "avx512f")]
10425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10426#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10427#[rustc_legacy_const_generics(3, 4, 5)]
10428pub unsafe fn _mm512_mask_getmant_round_ps<
10429 const NORM: _MM_MANTISSA_NORM_ENUM,
10430 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10431 const SAE: i32,
10432>(
10433 src: __m512,
10434 k: __mmask16,
10435 a: __m512,
10436) -> __m512 {
10437 static_assert_uimm_bits!(NORM, 4);
10438 static_assert_uimm_bits!(SIGN, 2);
10439 static_assert_mantissas_sae!(SAE);
10440 let a: f32x16 = a.as_f32x16();
10441 let src: f32x16 = src.as_f32x16();
10442 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src, m:k, SAE);
10443 transmute(src:r)
10444}
10445
10446/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10447/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10448/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10449/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10450/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10451/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10452/// The sign is determined by sc which can take the following values:\
10453/// _MM_MANT_SIGN_src // sign = sign(src)\
10454/// _MM_MANT_SIGN_zero // sign = 0\
10455/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10456/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10457///
10458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10459#[inline]
10460#[target_feature(enable = "avx512f")]
10461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10462#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10463#[rustc_legacy_const_generics(2, 3, 4)]
10464pub unsafe fn _mm512_maskz_getmant_round_ps<
10465 const NORM: _MM_MANTISSA_NORM_ENUM,
10466 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10467 const SAE: i32,
10468>(
10469 k: __mmask16,
10470 a: __m512,
10471) -> __m512 {
10472 static_assert_uimm_bits!(NORM, 4);
10473 static_assert_uimm_bits!(SIGN, 2);
10474 static_assert_mantissas_sae!(SAE);
10475 let a: f32x16 = a.as_f32x16();
10476 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
10477 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:zero, m:k, SAE);
10478 transmute(src:r)
10479}
10480
10481/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10482/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10483/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10484/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10485/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10486/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10487/// The sign is determined by sc which can take the following values:\
10488/// _MM_MANT_SIGN_src // sign = sign(src)\
10489/// _MM_MANT_SIGN_zero // sign = 0\
10490/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10491/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10492///
10493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10494#[inline]
10495#[target_feature(enable = "avx512f")]
10496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10497#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10498#[rustc_legacy_const_generics(1, 2, 3)]
10499pub unsafe fn _mm512_getmant_round_pd<
10500 const NORM: _MM_MANTISSA_NORM_ENUM,
10501 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10502 const SAE: i32,
10503>(
10504 a: __m512d,
10505) -> __m512d {
10506 static_assert_uimm_bits!(NORM, 4);
10507 static_assert_uimm_bits!(SIGN, 2);
10508 static_assert_mantissas_sae!(SAE);
10509 let a: f64x8 = a.as_f64x8();
10510 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10511 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:zero, m:0b11111111, SAE);
10512 transmute(src:r)
10513}
10514
10515/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10516/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10517/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10518/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10519/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10520/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10521/// The sign is determined by sc which can take the following values:\
10522/// _MM_MANT_SIGN_src // sign = sign(src)\
10523/// _MM_MANT_SIGN_zero // sign = 0\
10524/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526///
10527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10528#[inline]
10529#[target_feature(enable = "avx512f")]
10530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10531#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10532#[rustc_legacy_const_generics(3, 4, 5)]
10533pub unsafe fn _mm512_mask_getmant_round_pd<
10534 const NORM: _MM_MANTISSA_NORM_ENUM,
10535 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10536 const SAE: i32,
10537>(
10538 src: __m512d,
10539 k: __mmask8,
10540 a: __m512d,
10541) -> __m512d {
10542 static_assert_uimm_bits!(NORM, 4);
10543 static_assert_uimm_bits!(SIGN, 2);
10544 static_assert_mantissas_sae!(SAE);
10545 let a: f64x8 = a.as_f64x8();
10546 let src: f64x8 = src.as_f64x8();
10547 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, SAE);
10548 transmute(src:r)
10549}
10550
10551/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10552/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10553/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10554/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10555/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10556/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10557/// The sign is determined by sc which can take the following values:\
10558/// _MM_MANT_SIGN_src // sign = sign(src)\
10559/// _MM_MANT_SIGN_zero // sign = 0\
10560/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10561/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10562///
10563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_512_maskz_getmant_round_pd&expand=2885)
10564#[inline]
10565#[target_feature(enable = "avx512f")]
10566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10567#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10568#[rustc_legacy_const_generics(2, 3, 4)]
10569pub unsafe fn _mm512_maskz_getmant_round_pd<
10570 const NORM: _MM_MANTISSA_NORM_ENUM,
10571 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10572 const SAE: i32,
10573>(
10574 k: __mmask8,
10575 a: __m512d,
10576) -> __m512d {
10577 static_assert_uimm_bits!(NORM, 4);
10578 static_assert_uimm_bits!(SIGN, 2);
10579 static_assert_mantissas_sae!(SAE);
10580 let a: f64x8 = a.as_f64x8();
10581 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
10582 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:zero, m:k, SAE);
10583 transmute(src:r)
10584}
10585
10586/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10587///
10588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10589#[inline]
10590#[target_feature(enable = "avx512f")]
10591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10592#[cfg_attr(test, assert_instr(vcvtps2dq))]
10593pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10594 transmute(src:vcvtps2dq(
10595 a:a.as_f32x16(),
10596 src:_mm512_setzero_si512().as_i32x16(),
10597 mask:0b11111111_11111111,
10598 _MM_FROUND_CUR_DIRECTION,
10599 ))
10600}
10601
10602/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10603///
10604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10605#[inline]
10606#[target_feature(enable = "avx512f")]
10607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10608#[cfg_attr(test, assert_instr(vcvtps2dq))]
10609pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10610 transmute(src:vcvtps2dq(
10611 a:a.as_f32x16(),
10612 src:src.as_i32x16(),
10613 mask:k,
10614 _MM_FROUND_CUR_DIRECTION,
10615 ))
10616}
10617
10618/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10619///
10620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10621#[inline]
10622#[target_feature(enable = "avx512f")]
10623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10624#[cfg_attr(test, assert_instr(vcvtps2dq))]
10625pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10626 transmute(src:vcvtps2dq(
10627 a:a.as_f32x16(),
10628 src:_mm512_setzero_si512().as_i32x16(),
10629 mask:k,
10630 _MM_FROUND_CUR_DIRECTION,
10631 ))
10632}
10633
10634/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10635///
10636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10637#[inline]
10638#[target_feature(enable = "avx512f,avx512vl")]
10639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10640#[cfg_attr(test, assert_instr(vcvtps2dq))]
10641pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10642 let convert: __m256i = _mm256_cvtps_epi32(a);
10643 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
10644}
10645
10646/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10647///
10648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10649#[inline]
10650#[target_feature(enable = "avx512f,avx512vl")]
10651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10652#[cfg_attr(test, assert_instr(vcvtps2dq))]
10653pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10654 let convert: __m256i = _mm256_cvtps_epi32(a);
10655 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
10656 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:zero))
10657}
10658
10659/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10660///
10661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10662#[inline]
10663#[target_feature(enable = "avx512f,avx512vl")]
10664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10665#[cfg_attr(test, assert_instr(vcvtps2dq))]
10666pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10667 let convert: __m128i = _mm_cvtps_epi32(a);
10668 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
10669}
10670
10671/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10674#[inline]
10675#[target_feature(enable = "avx512f,avx512vl")]
10676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677#[cfg_attr(test, assert_instr(vcvtps2dq))]
10678pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10679 let convert: __m128i = _mm_cvtps_epi32(a);
10680 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
10681 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:zero))
10682}
10683
10684/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10685///
10686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10687#[inline]
10688#[target_feature(enable = "avx512f")]
10689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10690#[cfg_attr(test, assert_instr(vcvtps2udq))]
10691pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10692 transmute(src:vcvtps2udq(
10693 a:a.as_f32x16(),
10694 src:_mm512_setzero_si512().as_u32x16(),
10695 mask:0b11111111_11111111,
10696 _MM_FROUND_CUR_DIRECTION,
10697 ))
10698}
10699
10700/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10701///
10702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10703#[inline]
10704#[target_feature(enable = "avx512f")]
10705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10706#[cfg_attr(test, assert_instr(vcvtps2udq))]
10707pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10708 transmute(src:vcvtps2udq(
10709 a:a.as_f32x16(),
10710 src:src.as_u32x16(),
10711 mask:k,
10712 _MM_FROUND_CUR_DIRECTION,
10713 ))
10714}
10715
10716/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10717///
10718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
10719#[inline]
10720#[target_feature(enable = "avx512f")]
10721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10722#[cfg_attr(test, assert_instr(vcvtps2udq))]
10723pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10724 transmute(src:vcvtps2udq(
10725 a:a.as_f32x16(),
10726 src:_mm512_setzero_si512().as_u32x16(),
10727 mask:k,
10728 _MM_FROUND_CUR_DIRECTION,
10729 ))
10730}
10731
10732/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10733///
10734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10735#[inline]
10736#[target_feature(enable = "avx512f,avx512vl")]
10737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10738#[cfg_attr(test, assert_instr(vcvtps2udq))]
10739pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10740 transmute(src:vcvtps2udq256(
10741 a:a.as_f32x8(),
10742 src:_mm256_setzero_si256().as_u32x8(),
10743 mask:0b11111111,
10744 ))
10745}
10746
10747/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10748///
10749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10750#[inline]
10751#[target_feature(enable = "avx512f,avx512vl")]
10752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10753#[cfg_attr(test, assert_instr(vcvtps2udq))]
10754pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10755 transmute(src:vcvtps2udq256(a:a.as_f32x8(), src:src.as_u32x8(), mask:k))
10756}
10757
10758/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10759///
10760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10761#[inline]
10762#[target_feature(enable = "avx512f,avx512vl")]
10763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10764#[cfg_attr(test, assert_instr(vcvtps2udq))]
10765pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
10766 transmute(src:vcvtps2udq256(
10767 a:a.as_f32x8(),
10768 src:_mm256_setzero_si256().as_u32x8(),
10769 mask:k,
10770 ))
10771}
10772
10773/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10774///
10775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
10776#[inline]
10777#[target_feature(enable = "avx512f,avx512vl")]
10778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10779#[cfg_attr(test, assert_instr(vcvtps2udq))]
10780pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i {
10781 transmute(src:vcvtps2udq128(
10782 a:a.as_f32x4(),
10783 src:_mm_setzero_si128().as_u32x4(),
10784 mask:0b11111111,
10785 ))
10786}
10787
10788/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10789///
10790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
10791#[inline]
10792#[target_feature(enable = "avx512f,avx512vl")]
10793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10794#[cfg_attr(test, assert_instr(vcvtps2udq))]
10795pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10796 transmute(src:vcvtps2udq128(a:a.as_f32x4(), src:src.as_u32x4(), mask:k))
10797}
10798
10799/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10800///
10801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
10802#[inline]
10803#[target_feature(enable = "avx512f,avx512vl")]
10804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10805#[cfg_attr(test, assert_instr(vcvtps2udq))]
10806pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
10807 transmute(src:vcvtps2udq128(
10808 a:a.as_f32x4(),
10809 src:_mm_setzero_si128().as_u32x4(),
10810 mask:k,
10811 ))
10812}
10813
10814/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
10815///
10816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
10817#[inline]
10818#[target_feature(enable = "avx512f")]
10819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10820#[cfg_attr(test, assert_instr(vcvtps2pd))]
10821pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
10822 transmute(src:vcvtps2pd(
10823 a:a.as_f32x8(),
10824 src:_mm512_setzero_pd().as_f64x8(),
10825 mask:0b11111111,
10826 _MM_FROUND_CUR_DIRECTION,
10827 ))
10828}
10829
10830/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10831///
10832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
10833#[inline]
10834#[target_feature(enable = "avx512f")]
10835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10836#[cfg_attr(test, assert_instr(vcvtps2pd))]
10837pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
10838 transmute(src:vcvtps2pd(
10839 a:a.as_f32x8(),
10840 src:src.as_f64x8(),
10841 mask:k,
10842 _MM_FROUND_CUR_DIRECTION,
10843 ))
10844}
10845
10846/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10847///
10848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
10849#[inline]
10850#[target_feature(enable = "avx512f")]
10851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10852#[cfg_attr(test, assert_instr(vcvtps2pd))]
10853pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
10854 transmute(src:vcvtps2pd(
10855 a:a.as_f32x8(),
10856 src:_mm512_setzero_pd().as_f64x8(),
10857 mask:k,
10858 _MM_FROUND_CUR_DIRECTION,
10859 ))
10860}
10861
10862/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
10863///
10864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
10865#[inline]
10866#[target_feature(enable = "avx512f")]
10867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10868#[cfg_attr(test, assert_instr(vcvtps2pd))]
10869pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
10870 transmute(src:vcvtps2pd(
10871 a:_mm512_castps512_ps256(v2).as_f32x8(),
10872 src:_mm512_setzero_pd().as_f64x8(),
10873 mask:0b11111111,
10874 _MM_FROUND_CUR_DIRECTION,
10875 ))
10876}
10877
10878/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10879///
10880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
10881#[inline]
10882#[target_feature(enable = "avx512f")]
10883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10884#[cfg_attr(test, assert_instr(vcvtps2pd))]
10885pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
10886 transmute(src:vcvtps2pd(
10887 a:_mm512_castps512_ps256(v2).as_f32x8(),
10888 src:src.as_f64x8(),
10889 mask:k,
10890 _MM_FROUND_CUR_DIRECTION,
10891 ))
10892}
10893
10894/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
10895///
10896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
10897#[inline]
10898#[target_feature(enable = "avx512f")]
10899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10900#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10901pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
10902 transmute(src:vcvtpd2ps(
10903 a:a.as_f64x8(),
10904 src:_mm256_setzero_ps().as_f32x8(),
10905 mask:0b11111111,
10906 _MM_FROUND_CUR_DIRECTION,
10907 ))
10908}
10909
10910/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10911///
10912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
10913#[inline]
10914#[target_feature(enable = "avx512f")]
10915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10916#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10917pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
10918 transmute(src:vcvtpd2ps(
10919 a:a.as_f64x8(),
10920 src:src.as_f32x8(),
10921 mask:k,
10922 _MM_FROUND_CUR_DIRECTION,
10923 ))
10924}
10925
10926/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10927///
10928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
10929#[inline]
10930#[target_feature(enable = "avx512f")]
10931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10932#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10933pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
10934 transmute(src:vcvtpd2ps(
10935 a:a.as_f64x8(),
10936 src:_mm256_setzero_ps().as_f32x8(),
10937 mask:k,
10938 _MM_FROUND_CUR_DIRECTION,
10939 ))
10940}
10941
10942/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10943///
10944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
10945#[inline]
10946#[target_feature(enable = "avx512f,avx512vl")]
10947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10948#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10949pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
10950 let convert: __m128 = _mm256_cvtpd_ps(a);
10951 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
10952}
10953
10954/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10955///
10956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
10957#[inline]
10958#[target_feature(enable = "avx512f,avx512vl")]
10959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10960#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10961pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
10962 let convert: __m128 = _mm256_cvtpd_ps(a);
10963 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
10964 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:zero))
10965}
10966
10967/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10968///
10969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
10970#[inline]
10971#[target_feature(enable = "avx512f,avx512vl")]
10972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10973#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10974pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
10975 let convert: __m128 = _mm_cvtpd_ps(a);
10976 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
10977}
10978
10979/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10980///
10981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
10982#[inline]
10983#[target_feature(enable = "avx512f,avx512vl")]
10984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10985#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10986pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
10987 let convert: __m128 = _mm_cvtpd_ps(a);
10988 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
10989 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:zero))
10990}
10991
10992/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
10995#[inline]
10996#[target_feature(enable = "avx512f")]
10997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10999pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11000 transmute(src:vcvtpd2dq(
11001 a:a.as_f64x8(),
11002 src:_mm256_setzero_si256().as_i32x8(),
11003 mask:0b11111111,
11004 _MM_FROUND_CUR_DIRECTION,
11005 ))
11006}
11007
11008/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11009///
11010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11011#[inline]
11012#[target_feature(enable = "avx512f")]
11013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11014#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11015pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11016 transmute(src:vcvtpd2dq(
11017 a:a.as_f64x8(),
11018 src:src.as_i32x8(),
11019 mask:k,
11020 _MM_FROUND_CUR_DIRECTION,
11021 ))
11022}
11023
11024/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11025///
11026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11027#[inline]
11028#[target_feature(enable = "avx512f")]
11029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11030#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11031pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11032 transmute(src:vcvtpd2dq(
11033 a:a.as_f64x8(),
11034 src:_mm256_setzero_si256().as_i32x8(),
11035 mask:k,
11036 _MM_FROUND_CUR_DIRECTION,
11037 ))
11038}
11039
11040/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11041///
11042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11043#[inline]
11044#[target_feature(enable = "avx512f,avx512vl")]
11045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11046#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11047pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11048 let convert: __m128i = _mm256_cvtpd_epi32(a);
11049 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11050}
11051
11052/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11053///
11054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11055#[inline]
11056#[target_feature(enable = "avx512f,avx512vl")]
11057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11058#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11059pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11060 let convert: __m128i = _mm256_cvtpd_epi32(a);
11061 transmute(src:simd_select_bitmask(
11062 m:k,
11063 yes:convert.as_i32x4(),
11064 no:_mm_setzero_si128().as_i32x4(),
11065 ))
11066}
11067
11068/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11069///
11070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11071#[inline]
11072#[target_feature(enable = "avx512f,avx512vl")]
11073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11074#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11075pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11076 let convert: __m128i = _mm_cvtpd_epi32(a);
11077 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11078}
11079
11080/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11081///
11082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11083#[inline]
11084#[target_feature(enable = "avx512f,avx512vl")]
11085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11086#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11087pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11088 let convert: __m128i = _mm_cvtpd_epi32(a);
11089 transmute(src:simd_select_bitmask(
11090 m:k,
11091 yes:convert.as_i32x4(),
11092 no:_mm_setzero_si128().as_i32x4(),
11093 ))
11094}
11095
11096/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11097///
11098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11099#[inline]
11100#[target_feature(enable = "avx512f")]
11101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11102#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11103pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11104 transmute(src:vcvtpd2udq(
11105 a:a.as_f64x8(),
11106 src:_mm256_setzero_si256().as_u32x8(),
11107 mask:0b11111111,
11108 _MM_FROUND_CUR_DIRECTION,
11109 ))
11110}
11111
11112/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11113///
11114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11115#[inline]
11116#[target_feature(enable = "avx512f")]
11117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11118#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11119pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11120 transmute(src:vcvtpd2udq(
11121 a:a.as_f64x8(),
11122 src:src.as_u32x8(),
11123 mask:k,
11124 _MM_FROUND_CUR_DIRECTION,
11125 ))
11126}
11127
11128/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11129///
11130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11131#[inline]
11132#[target_feature(enable = "avx512f")]
11133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11134#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11135pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11136 transmute(src:vcvtpd2udq(
11137 a:a.as_f64x8(),
11138 src:_mm256_setzero_si256().as_u32x8(),
11139 mask:k,
11140 _MM_FROUND_CUR_DIRECTION,
11141 ))
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11147#[inline]
11148#[target_feature(enable = "avx512f,avx512vl")]
11149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11151pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11152 transmute(src:vcvtpd2udq256(
11153 a:a.as_f64x4(),
11154 src:_mm_setzero_si128().as_u32x4(),
11155 mask:0b11111111,
11156 ))
11157}
11158
11159/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11160///
11161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11162#[inline]
11163#[target_feature(enable = "avx512f,avx512vl")]
11164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11165#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11166pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11167 transmute(src:vcvtpd2udq256(a:a.as_f64x4(), src:src.as_u32x4(), mask:k))
11168}
11169
11170/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11171///
11172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11173#[inline]
11174#[target_feature(enable = "avx512f,avx512vl")]
11175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11176#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11177pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11178 transmute(src:vcvtpd2udq256(
11179 a:a.as_f64x4(),
11180 src:_mm_setzero_si128().as_u32x4(),
11181 mask:k,
11182 ))
11183}
11184
11185/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11186///
11187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11188#[inline]
11189#[target_feature(enable = "avx512f,avx512vl")]
11190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11191#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11192pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11193 transmute(src:vcvtpd2udq128(
11194 a:a.as_f64x2(),
11195 src:_mm_setzero_si128().as_u32x4(),
11196 mask:0b11111111,
11197 ))
11198}
11199
11200/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11201///
11202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11203#[inline]
11204#[target_feature(enable = "avx512f,avx512vl")]
11205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11206#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11207pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11208 transmute(src:vcvtpd2udq128(a:a.as_f64x2(), src:src.as_u32x4(), mask:k))
11209}
11210
11211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11212///
11213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11214#[inline]
11215#[target_feature(enable = "avx512f,avx512vl")]
11216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11217#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11218pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11219 transmute(src:vcvtpd2udq128(
11220 a:a.as_f64x2(),
11221 src:_mm_setzero_si128().as_u32x4(),
11222 mask:k,
11223 ))
11224}
11225
11226/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11227///
11228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11229#[inline]
11230#[target_feature(enable = "avx512f")]
11231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11232#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11233pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11234 let r: f32x8 = vcvtpd2ps(
11235 a:v2.as_f64x8(),
11236 src:_mm256_setzero_ps().as_f32x8(),
11237 mask:0b11111111,
11238 _MM_FROUND_CUR_DIRECTION,
11239 );
11240 simd_shuffle!(
11241 r,
11242 _mm256_setzero_ps().as_f32x8(),
11243 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11244 )
11245}
11246
11247/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11248///
11249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11250#[inline]
11251#[target_feature(enable = "avx512f")]
11252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11253#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11254pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11255 let r: f32x8 = vcvtpd2ps(
11256 a:v2.as_f64x8(),
11257 src:_mm512_castps512_ps256(src).as_f32x8(),
11258 mask:k,
11259 _MM_FROUND_CUR_DIRECTION,
11260 );
11261 simd_shuffle!(
11262 r,
11263 _mm256_setzero_ps().as_f32x8(),
11264 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11265 )
11266}
11267
11268/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11269///
11270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11271#[inline]
11272#[target_feature(enable = "avx512f")]
11273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11274#[cfg_attr(test, assert_instr(vpmovsxbd))]
11275pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11276 let a: i8x16 = a.as_i8x16();
11277 transmute::<i32x16, _>(src:simd_cast(a))
11278}
11279
11280/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11281///
11282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11283#[inline]
11284#[target_feature(enable = "avx512f")]
11285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11286#[cfg_attr(test, assert_instr(vpmovsxbd))]
11287pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11288 let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11289 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11290}
11291
11292/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11293///
11294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11295#[inline]
11296#[target_feature(enable = "avx512f")]
11297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11298#[cfg_attr(test, assert_instr(vpmovsxbd))]
11299pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11300 let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11301 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11302 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11303}
11304
11305/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11306///
11307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11308#[inline]
11309#[target_feature(enable = "avx512f,avx512vl")]
11310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11311#[cfg_attr(test, assert_instr(vpmovsxbd))]
11312pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11313 let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11314 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11315}
11316
11317/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11318///
11319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11320#[inline]
11321#[target_feature(enable = "avx512f,avx512vl")]
11322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11323#[cfg_attr(test, assert_instr(vpmovsxbd))]
11324pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11325 let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11326 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11327 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11328}
11329
11330/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11331///
11332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11333#[inline]
11334#[target_feature(enable = "avx512f,avx512vl")]
11335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11336#[cfg_attr(test, assert_instr(vpmovsxbd))]
11337pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11338 let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11339 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11340}
11341
11342/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11343///
11344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11345#[inline]
11346#[target_feature(enable = "avx512f,avx512vl")]
11347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11348#[cfg_attr(test, assert_instr(vpmovsxbd))]
11349pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11350 let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11351 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11352 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11353}
11354
11355/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11356///
11357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11358#[inline]
11359#[target_feature(enable = "avx512f")]
11360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11361#[cfg_attr(test, assert_instr(vpmovsxbq))]
11362pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11363 let a: i8x16 = a.as_i8x16();
11364 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11365 transmute::<i64x8, _>(src:simd_cast(v64))
11366}
11367
11368/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11369///
11370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11371#[inline]
11372#[target_feature(enable = "avx512f")]
11373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11374#[cfg_attr(test, assert_instr(vpmovsxbq))]
11375pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11376 let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11377 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11378}
11379
11380/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11381///
11382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11383#[inline]
11384#[target_feature(enable = "avx512f")]
11385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11386#[cfg_attr(test, assert_instr(vpmovsxbq))]
11387pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11388 let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11389 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11390 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11391}
11392
11393/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11394///
11395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11396#[inline]
11397#[target_feature(enable = "avx512f,avx512vl")]
11398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11399#[cfg_attr(test, assert_instr(vpmovsxbq))]
11400pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11401 let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11402 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11403}
11404
11405/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11406///
11407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11408#[inline]
11409#[target_feature(enable = "avx512f,avx512vl")]
11410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11411#[cfg_attr(test, assert_instr(vpmovsxbq))]
11412pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11413 let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11414 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11415 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11416}
11417
11418/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11419///
11420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11421#[inline]
11422#[target_feature(enable = "avx512f,avx512vl")]
11423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11424#[cfg_attr(test, assert_instr(vpmovsxbq))]
11425pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11426 let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11427 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11428}
11429
11430/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11431///
11432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11433#[inline]
11434#[target_feature(enable = "avx512f,avx512vl")]
11435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11436#[cfg_attr(test, assert_instr(vpmovsxbq))]
11437pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11438 let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11439 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11440 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11441}
11442
11443/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11444///
11445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11446#[inline]
11447#[target_feature(enable = "avx512f")]
11448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11449#[cfg_attr(test, assert_instr(vpmovzxbd))]
11450pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11451 let a: u8x16 = a.as_u8x16();
11452 transmute::<i32x16, _>(src:simd_cast(a))
11453}
11454
11455/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11458#[inline]
11459#[target_feature(enable = "avx512f")]
11460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461#[cfg_attr(test, assert_instr(vpmovzxbd))]
11462pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11463 let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11464 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11465}
11466
11467/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11468///
11469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11470#[inline]
11471#[target_feature(enable = "avx512f")]
11472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11473#[cfg_attr(test, assert_instr(vpmovzxbd))]
11474pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11475 let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11476 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11477 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11478}
11479
11480/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11481///
11482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11483#[inline]
11484#[target_feature(enable = "avx512f,avx512vl")]
11485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11486#[cfg_attr(test, assert_instr(vpmovzxbd))]
11487pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11488 let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11489 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11490}
11491
11492/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11493///
11494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11495#[inline]
11496#[target_feature(enable = "avx512f,avx512vl")]
11497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11498#[cfg_attr(test, assert_instr(vpmovzxbd))]
11499pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11500 let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11501 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11502 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11503}
11504
11505/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11506///
11507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11508#[inline]
11509#[target_feature(enable = "avx512f,avx512vl")]
11510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11511#[cfg_attr(test, assert_instr(vpmovzxbd))]
11512pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11513 let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11514 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11515}
11516
11517/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11518///
11519/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11520#[inline]
11521#[target_feature(enable = "avx512f,avx512vl")]
11522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11523#[cfg_attr(test, assert_instr(vpmovzxbd))]
11524pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11525 let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11526 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11527 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11528}
11529
11530/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11531///
11532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11533#[inline]
11534#[target_feature(enable = "avx512f")]
11535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11536#[cfg_attr(test, assert_instr(vpmovzxbq))]
11537pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11538 let a: u8x16 = a.as_u8x16();
11539 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11540 transmute::<i64x8, _>(src:simd_cast(v64))
11541}
11542
11543/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11544///
11545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11546#[inline]
11547#[target_feature(enable = "avx512f")]
11548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11549#[cfg_attr(test, assert_instr(vpmovzxbq))]
11550pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11551 let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11552 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11553}
11554
11555/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11556///
11557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11558#[inline]
11559#[target_feature(enable = "avx512f")]
11560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11561#[cfg_attr(test, assert_instr(vpmovzxbq))]
11562pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11563 let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11564 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11565 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11566}
11567
11568/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574#[cfg_attr(test, assert_instr(vpmovzxbq))]
11575pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11576 let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11577 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11578}
11579
11580/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11581///
11582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11583#[inline]
11584#[target_feature(enable = "avx512f,avx512vl")]
11585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11586#[cfg_attr(test, assert_instr(vpmovzxbq))]
11587pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11588 let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11589 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11590 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11591}
11592
11593/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11594///
11595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11596#[inline]
11597#[target_feature(enable = "avx512f,avx512vl")]
11598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11599#[cfg_attr(test, assert_instr(vpmovzxbq))]
11600pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11601 let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11602 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11603}
11604
11605/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11606///
11607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11608#[inline]
11609#[target_feature(enable = "avx512f,avx512vl")]
11610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11611#[cfg_attr(test, assert_instr(vpmovzxbq))]
11612pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11613 let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11614 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11615 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11616}
11617
11618/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11619///
11620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11621#[inline]
11622#[target_feature(enable = "avx512f")]
11623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11624#[cfg_attr(test, assert_instr(vpmovsxwd))]
11625pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11626 let a: i16x16 = a.as_i16x16();
11627 transmute::<i32x16, _>(src:simd_cast(a))
11628}
11629
11630/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11631///
11632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11633#[inline]
11634#[target_feature(enable = "avx512f")]
11635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11636#[cfg_attr(test, assert_instr(vpmovsxwd))]
11637pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11638 let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11639 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11640}
11641
11642/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11643///
11644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11645#[inline]
11646#[target_feature(enable = "avx512f")]
11647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11648#[cfg_attr(test, assert_instr(vpmovsxwd))]
11649pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11650 let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11651 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11652 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11653}
11654
11655/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11656///
11657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11658#[inline]
11659#[target_feature(enable = "avx512f,avx512vl")]
11660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11661#[cfg_attr(test, assert_instr(vpmovsxwd))]
11662pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11663 let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11664 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11665}
11666
11667/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673#[cfg_attr(test, assert_instr(vpmovsxwd))]
11674pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11675 let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11676 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11677 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11678}
11679
11680/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11681///
11682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11683#[inline]
11684#[target_feature(enable = "avx512f,avx512vl")]
11685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11686#[cfg_attr(test, assert_instr(vpmovsxwd))]
11687pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11688 let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11689 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11690}
11691
11692/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11693///
11694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11695#[inline]
11696#[target_feature(enable = "avx512f,avx512vl")]
11697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11698#[cfg_attr(test, assert_instr(vpmovsxwd))]
11699pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11700 let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11701 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11702 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11703}
11704
11705/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
11706///
11707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
11708#[inline]
11709#[target_feature(enable = "avx512f")]
11710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11711#[cfg_attr(test, assert_instr(vpmovsxwq))]
11712pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
11713 let a: i16x8 = a.as_i16x8();
11714 transmute::<i64x8, _>(src:simd_cast(a))
11715}
11716
11717/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11718///
11719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
11720#[inline]
11721#[target_feature(enable = "avx512f")]
11722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11723#[cfg_attr(test, assert_instr(vpmovsxwq))]
11724pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11725 let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
11726 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11727}
11728
11729/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11730///
11731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
11732#[inline]
11733#[target_feature(enable = "avx512f")]
11734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11735#[cfg_attr(test, assert_instr(vpmovsxwq))]
11736pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
11737 let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
11738 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11739 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11740}
11741
11742/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11743///
11744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
11745#[inline]
11746#[target_feature(enable = "avx512f,avx512vl")]
11747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11748#[cfg_attr(test, assert_instr(vpmovsxwq))]
11749pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11750 let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
11751 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11752}
11753
11754/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11755///
11756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
11757#[inline]
11758#[target_feature(enable = "avx512f,avx512vl")]
11759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11760#[cfg_attr(test, assert_instr(vpmovsxwq))]
11761pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
11762 let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
11763 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11764 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11765}
11766
11767/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11768///
11769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
11770#[inline]
11771#[target_feature(enable = "avx512f,avx512vl")]
11772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11773#[cfg_attr(test, assert_instr(vpmovsxwq))]
11774pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11775 let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
11776 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11777}
11778
11779/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785#[cfg_attr(test, assert_instr(vpmovsxwq))]
11786pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
11787 let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
11788 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11789 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11790}
11791
11792/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11793///
11794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
11795#[inline]
11796#[target_feature(enable = "avx512f")]
11797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11798#[cfg_attr(test, assert_instr(vpmovzxwd))]
11799pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
11800 let a: u16x16 = a.as_u16x16();
11801 transmute::<i32x16, _>(src:simd_cast(a))
11802}
11803
11804/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11805///
11806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
11807#[inline]
11808#[target_feature(enable = "avx512f")]
11809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11810#[cfg_attr(test, assert_instr(vpmovzxwd))]
11811pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11812 let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
11813 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11814}
11815
11816/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11817///
11818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
11819#[inline]
11820#[target_feature(enable = "avx512f")]
11821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11822#[cfg_attr(test, assert_instr(vpmovzxwd))]
11823pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11824 let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
11825 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
11826 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11827}
11828
11829/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11830///
11831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
11832#[inline]
11833#[target_feature(enable = "avx512f,avx512vl")]
11834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11835#[cfg_attr(test, assert_instr(vpmovzxwd))]
11836pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11837 let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
11838 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11839}
11840
11841/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11842///
11843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
11844#[inline]
11845#[target_feature(enable = "avx512f,avx512vl")]
11846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11847#[cfg_attr(test, assert_instr(vpmovzxwd))]
11848pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11849 let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
11850 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
11851 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11852}
11853
11854/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11855///
11856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
11857#[inline]
11858#[target_feature(enable = "avx512f,avx512vl")]
11859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11860#[cfg_attr(test, assert_instr(vpmovzxwd))]
11861pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11862 let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
11863 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11864}
11865
11866/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11867///
11868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
11869#[inline]
11870#[target_feature(enable = "avx512f,avx512vl")]
11871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11872#[cfg_attr(test, assert_instr(vpmovzxwd))]
11873pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11874 let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
11875 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
11876 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11877}
11878
11879/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
11880///
11881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
11882#[inline]
11883#[target_feature(enable = "avx512f")]
11884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11885#[cfg_attr(test, assert_instr(vpmovzxwq))]
11886pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
11887 let a: u16x8 = a.as_u16x8();
11888 transmute::<i64x8, _>(src:simd_cast(a))
11889}
11890
11891/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11892///
11893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
11894#[inline]
11895#[target_feature(enable = "avx512f")]
11896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11897#[cfg_attr(test, assert_instr(vpmovzxwq))]
11898pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11899 let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
11900 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11901}
11902
11903/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11904///
11905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
11906#[inline]
11907#[target_feature(enable = "avx512f")]
11908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11909#[cfg_attr(test, assert_instr(vpmovzxwq))]
11910pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
11911 let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
11912 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
11913 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11914}
11915
11916/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11917///
11918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
11919#[inline]
11920#[target_feature(enable = "avx512f,avx512vl")]
11921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11922#[cfg_attr(test, assert_instr(vpmovzxwq))]
11923pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11924 let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
11925 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11926}
11927
11928/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11929///
11930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
11931#[inline]
11932#[target_feature(enable = "avx512f,avx512vl")]
11933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11934#[cfg_attr(test, assert_instr(vpmovzxwq))]
11935pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
11936 let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
11937 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
11938 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11939}
11940
11941/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11942///
11943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
11944#[inline]
11945#[target_feature(enable = "avx512f,avx512vl")]
11946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11947#[cfg_attr(test, assert_instr(vpmovzxwq))]
11948pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11949 let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
11950 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11951}
11952
11953/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954///
11955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
11956#[inline]
11957#[target_feature(enable = "avx512f,avx512vl")]
11958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11959#[cfg_attr(test, assert_instr(vpmovzxwq))]
11960pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
11961 let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
11962 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
11963 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
11964}
11965
11966/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
11967///
11968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
11969#[inline]
11970#[target_feature(enable = "avx512f")]
11971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11972#[cfg_attr(test, assert_instr(vpmovsxdq))]
11973pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
11974 let a: i32x8 = a.as_i32x8();
11975 transmute::<i64x8, _>(src:simd_cast(a))
11976}
11977
11978/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11979///
11980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
11981#[inline]
11982#[target_feature(enable = "avx512f")]
11983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11984#[cfg_attr(test, assert_instr(vpmovsxdq))]
11985pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
11986 let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
11987 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11988}
11989
11990/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
11993#[inline]
11994#[target_feature(enable = "avx512f")]
11995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996#[cfg_attr(test, assert_instr(vpmovsxdq))]
11997pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
11998 let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
11999 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
12000 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12001}
12002
12003/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12004///
12005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12006#[inline]
12007#[target_feature(enable = "avx512f,avx512vl")]
12008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12009#[cfg_attr(test, assert_instr(vpmovsxdq))]
12010pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12011 let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12012 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12013}
12014
12015/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12016///
12017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12018#[inline]
12019#[target_feature(enable = "avx512f,avx512vl")]
12020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12021#[cfg_attr(test, assert_instr(vpmovsxdq))]
12022pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12023 let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12024 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
12025 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12026}
12027
12028/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12029///
12030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12031#[inline]
12032#[target_feature(enable = "avx512f,avx512vl")]
12033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12034#[cfg_attr(test, assert_instr(vpmovsxdq))]
12035pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12036 let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12037 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12038}
12039
12040/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12041///
12042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12043#[inline]
12044#[target_feature(enable = "avx512f,avx512vl")]
12045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12046#[cfg_attr(test, assert_instr(vpmovsxdq))]
12047pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12048 let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12049 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
12050 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12051}
12052
12053/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12054///
12055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12056#[inline]
12057#[target_feature(enable = "avx512f")]
12058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12059#[cfg_attr(test, assert_instr(vpmovzxdq))]
12060pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12061 let a: u32x8 = a.as_u32x8();
12062 transmute::<i64x8, _>(src:simd_cast(a))
12063}
12064
12065/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12066///
12067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12068#[inline]
12069#[target_feature(enable = "avx512f")]
12070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12071#[cfg_attr(test, assert_instr(vpmovzxdq))]
12072pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12073 let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12074 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12075}
12076
12077/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12078///
12079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12080#[inline]
12081#[target_feature(enable = "avx512f")]
12082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12083#[cfg_attr(test, assert_instr(vpmovzxdq))]
12084pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12085 let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12086 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
12087 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12088}
12089
12090/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12091///
12092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12093#[inline]
12094#[target_feature(enable = "avx512f,avx512vl")]
12095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12096#[cfg_attr(test, assert_instr(vpmovzxdq))]
12097pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12098 let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12099 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12100}
12101
12102/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12105#[inline]
12106#[target_feature(enable = "avx512f,avx512vl")]
12107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108#[cfg_attr(test, assert_instr(vpmovzxdq))]
12109pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12110 let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12111 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
12112 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12113}
12114
12115/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12116///
12117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12118#[inline]
12119#[target_feature(enable = "avx512f,avx512vl")]
12120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12121#[cfg_attr(test, assert_instr(vpmovzxdq))]
12122pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12123 let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12124 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12125}
12126
12127/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12128///
12129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12130#[inline]
12131#[target_feature(enable = "avx512f,avx512vl")]
12132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12133#[cfg_attr(test, assert_instr(vpmovzxdq))]
12134pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12135 let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12136 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
12137 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12138}
12139
12140/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12141///
12142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12143#[inline]
12144#[target_feature(enable = "avx512f")]
12145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12146#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12147pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12148 let a: i32x16 = a.as_i32x16();
12149 transmute::<f32x16, _>(src:simd_cast(a))
12150}
12151
12152/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12153///
12154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12155#[inline]
12156#[target_feature(enable = "avx512f")]
12157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12158#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12159pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12160 let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12161 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12162}
12163
12164/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165///
12166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12167#[inline]
12168#[target_feature(enable = "avx512f")]
12169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12170#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12171pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12172 let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12173 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
12174 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12175}
12176
12177/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12178///
12179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12180#[inline]
12181#[target_feature(enable = "avx512f,avx512vl")]
12182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12183#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12184pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12185 let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12186 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12187}
12188
12189/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12190///
12191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12192#[inline]
12193#[target_feature(enable = "avx512f,avx512vl")]
12194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12195#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12196pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12197 let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12198 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
12199 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12200}
12201
12202/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12203///
12204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12205#[inline]
12206#[target_feature(enable = "avx512f,avx512vl")]
12207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12208#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12209pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12210 let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12211 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12212}
12213
12214/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12217#[inline]
12218#[target_feature(enable = "avx512f,avx512vl")]
12219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12221pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12222 let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12223 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
12224 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12225}
12226
12227/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12228///
12229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12230#[inline]
12231#[target_feature(enable = "avx512f")]
12232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12233#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12234pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12235 let a: i32x8 = a.as_i32x8();
12236 transmute::<f64x8, _>(src:simd_cast(a))
12237}
12238
12239/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12240///
12241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12242#[inline]
12243#[target_feature(enable = "avx512f")]
12244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12245#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12246pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12247 let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12248 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12249}
12250
12251/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12252///
12253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12254#[inline]
12255#[target_feature(enable = "avx512f")]
12256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12257#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12258pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12259 let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12260 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
12261 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12262}
12263
12264/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12265///
12266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12267#[inline]
12268#[target_feature(enable = "avx512f,avx512vl")]
12269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12270#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12271pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12272 let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12273 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12274}
12275
12276/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12277///
12278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12279#[inline]
12280#[target_feature(enable = "avx512f,avx512vl")]
12281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12282#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12283pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12284 let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12285 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
12286 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12287}
12288
12289/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12290///
12291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12292#[inline]
12293#[target_feature(enable = "avx512f,avx512vl")]
12294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12295#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12296pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12297 let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12298 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12299}
12300
12301/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12302///
12303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12304#[inline]
12305#[target_feature(enable = "avx512f,avx512vl")]
12306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12307#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12308pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12309 let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12310 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
12311 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12312}
12313
12314/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12315///
12316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12317#[inline]
12318#[target_feature(enable = "avx512f")]
12319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12320#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12321pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12322 let a: u32x16 = a.as_u32x16();
12323 transmute::<f32x16, _>(src:simd_cast(a))
12324}
12325
12326/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12333pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12334 let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12335 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12336}
12337
12338/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12339///
12340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12341#[inline]
12342#[target_feature(enable = "avx512f")]
12343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12344#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12345pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12346 let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12347 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
12348 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12349}
12350
12351/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12352///
12353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12354#[inline]
12355#[target_feature(enable = "avx512f")]
12356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12357#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12358pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12359 let a: u32x8 = a.as_u32x8();
12360 transmute::<f64x8, _>(src:simd_cast(a))
12361}
12362
12363/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12364///
12365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12366#[inline]
12367#[target_feature(enable = "avx512f")]
12368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12369#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12370pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12371 let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12372 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12373}
12374
12375/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12376///
12377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12378#[inline]
12379#[target_feature(enable = "avx512f")]
12380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12381#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12382pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12383 let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12384 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
12385 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12386}
12387
12388/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12389///
12390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12391#[inline]
12392#[target_feature(enable = "avx512f,avx512vl")]
12393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12394#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12395pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12396 let a: u32x4 = a.as_u32x4();
12397 transmute::<f64x4, _>(src:simd_cast(a))
12398}
12399
12400/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12401///
12402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12403#[inline]
12404#[target_feature(enable = "avx512f,avx512vl")]
12405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12406#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12407pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12408 let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12409 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12410}
12411
12412/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12413///
12414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12415#[inline]
12416#[target_feature(enable = "avx512f,avx512vl")]
12417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12418#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12419pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12420 let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12421 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
12422 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12423}
12424
12425/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12426///
12427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12428#[inline]
12429#[target_feature(enable = "avx512f,avx512vl")]
12430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12431#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12432pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12433 let a: u32x4 = a.as_u32x4();
12434 let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12435 transmute::<f64x2, _>(src:simd_cast(u64))
12436}
12437
12438/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12445pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12446 let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12447 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12448}
12449
12450/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12451///
12452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12453#[inline]
12454#[target_feature(enable = "avx512f,avx512vl")]
12455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12456#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12457pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12458 let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12459 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
12460 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12461}
12462
12463/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12464///
12465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12466#[inline]
12467#[target_feature(enable = "avx512f")]
12468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12469#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12470pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12471 let v2: i32x16 = v2.as_i32x16();
12472 let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12473 transmute::<f64x8, _>(src:simd_cast(v256))
12474}
12475
12476/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12477///
12478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12479#[inline]
12480#[target_feature(enable = "avx512f")]
12481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12482#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12483pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12484 let convert: f64x8 = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12485 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12486}
12487
12488/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12489///
12490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12491#[inline]
12492#[target_feature(enable = "avx512f")]
12493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12494#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12495pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12496 let v2: u32x16 = v2.as_u32x16();
12497 let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12498 transmute::<f64x8, _>(src:simd_cast(v256))
12499}
12500
12501/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12502///
12503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12504#[inline]
12505#[target_feature(enable = "avx512f")]
12506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12507#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12508pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12509 let convert: f64x8 = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12510 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12511}
12512
12513/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12514///
12515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12516#[inline]
12517#[target_feature(enable = "avx512f")]
12518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12519#[cfg_attr(test, assert_instr(vpmovdw))]
12520pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12521 let a: i32x16 = a.as_i32x16();
12522 transmute::<i16x16, _>(src:simd_cast(a))
12523}
12524
12525/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12526///
12527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12528#[inline]
12529#[target_feature(enable = "avx512f")]
12530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12531#[cfg_attr(test, assert_instr(vpmovdw))]
12532pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12533 let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12534 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
12535}
12536
12537/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12538///
12539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12540#[inline]
12541#[target_feature(enable = "avx512f")]
12542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12543#[cfg_attr(test, assert_instr(vpmovdw))]
12544pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12545 let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12546 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
12547 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12548}
12549
12550/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556#[cfg_attr(test, assert_instr(vpmovdw))]
12557pub unsafe fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12558 let a: i32x8 = a.as_i32x8();
12559 transmute::<i16x8, _>(src:simd_cast(a))
12560}
12561
12562/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12563///
12564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12565#[inline]
12566#[target_feature(enable = "avx512f,avx512vl")]
12567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12568#[cfg_attr(test, assert_instr(vpmovdw))]
12569pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12570 let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12571 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12572}
12573
12574/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12575///
12576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12577#[inline]
12578#[target_feature(enable = "avx512f,avx512vl")]
12579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12580#[cfg_attr(test, assert_instr(vpmovdw))]
12581pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12582 let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12583 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
12584 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12585}
12586
12587/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12588///
12589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
12590#[inline]
12591#[target_feature(enable = "avx512f,avx512vl")]
12592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12593#[cfg_attr(test, assert_instr(vpmovdw))]
12594pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
12595 transmute(src:vpmovdw128(
12596 a:a.as_i32x4(),
12597 src:_mm_setzero_si128().as_i16x8(),
12598 mask:0b11111111,
12599 ))
12600}
12601
12602/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12603///
12604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
12605#[inline]
12606#[target_feature(enable = "avx512f,avx512vl")]
12607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12608#[cfg_attr(test, assert_instr(vpmovdw))]
12609pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12610 transmute(src:vpmovdw128(a:a.as_i32x4(), src:src.as_i16x8(), mask:k))
12611}
12612
12613/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12614///
12615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
12616#[inline]
12617#[target_feature(enable = "avx512f,avx512vl")]
12618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12619#[cfg_attr(test, assert_instr(vpmovdw))]
12620pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
12621 transmute(src:vpmovdw128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
12622}
12623
12624/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12625///
12626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
12627#[inline]
12628#[target_feature(enable = "avx512f")]
12629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12630#[cfg_attr(test, assert_instr(vpmovdb))]
12631pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
12632 let a: i32x16 = a.as_i32x16();
12633 transmute::<i8x16, _>(src:simd_cast(a))
12634}
12635
12636/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12637///
12638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
12639#[inline]
12640#[target_feature(enable = "avx512f")]
12641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12642#[cfg_attr(test, assert_instr(vpmovdb))]
12643pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
12644 let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
12645 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
12646}
12647
12648/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
12651#[inline]
12652#[target_feature(enable = "avx512f")]
12653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654#[cfg_attr(test, assert_instr(vpmovdb))]
12655pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
12656 let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
12657 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
12658 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12659}
12660
12661/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12662///
12663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
12664#[inline]
12665#[target_feature(enable = "avx512f,avx512vl")]
12666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12667#[cfg_attr(test, assert_instr(vpmovdb))]
12668pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
12669 transmute(src:vpmovdb256(
12670 a:a.as_i32x8(),
12671 src:_mm_setzero_si128().as_i8x16(),
12672 mask:0b11111111,
12673 ))
12674}
12675
12676/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682#[cfg_attr(test, assert_instr(vpmovdb))]
12683pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12684 transmute(src:vpmovdb256(a:a.as_i32x8(), src:src.as_i8x16(), mask:k))
12685}
12686
12687/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12688///
12689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
12690#[inline]
12691#[target_feature(enable = "avx512f,avx512vl")]
12692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12693#[cfg_attr(test, assert_instr(vpmovdb))]
12694pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
12695 transmute(src:vpmovdb256(a:a.as_i32x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
12696}
12697
12698/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12699///
12700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
12701#[inline]
12702#[target_feature(enable = "avx512f,avx512vl")]
12703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12704#[cfg_attr(test, assert_instr(vpmovdb))]
12705pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
12706 transmute(src:vpmovdb128(
12707 a:a.as_i32x4(),
12708 src:_mm_setzero_si128().as_i8x16(),
12709 mask:0b11111111,
12710 ))
12711}
12712
12713/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12714///
12715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
12716#[inline]
12717#[target_feature(enable = "avx512f,avx512vl")]
12718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12719#[cfg_attr(test, assert_instr(vpmovdb))]
12720pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12721 transmute(src:vpmovdb128(a:a.as_i32x4(), src:src.as_i8x16(), mask:k))
12722}
12723
12724/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12725///
12726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
12727#[inline]
12728#[target_feature(enable = "avx512f,avx512vl")]
12729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12730#[cfg_attr(test, assert_instr(vpmovdb))]
12731pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
12732 transmute(src:vpmovdb128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
12733}
12734
12735/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12736///
12737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
12738#[inline]
12739#[target_feature(enable = "avx512f")]
12740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12741#[cfg_attr(test, assert_instr(vpmovqd))]
12742pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
12743 let a: i64x8 = a.as_i64x8();
12744 transmute::<i32x8, _>(src:simd_cast(a))
12745}
12746
12747/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12748///
12749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
12750#[inline]
12751#[target_feature(enable = "avx512f")]
12752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12753#[cfg_attr(test, assert_instr(vpmovqd))]
12754pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
12755 let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
12756 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12757}
12758
12759/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12760///
12761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
12762#[inline]
12763#[target_feature(enable = "avx512f")]
12764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12765#[cfg_attr(test, assert_instr(vpmovqd))]
12766pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
12767 let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
12768 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
12769 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12770}
12771
12772/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12773///
12774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
12775#[inline]
12776#[target_feature(enable = "avx512f,avx512vl")]
12777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12778#[cfg_attr(test, assert_instr(vpmovqd))]
12779pub unsafe fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
12780 let a: i64x4 = a.as_i64x4();
12781 transmute::<i32x4, _>(src:simd_cast(a))
12782}
12783
12784/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12785///
12786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
12787#[inline]
12788#[target_feature(enable = "avx512f,avx512vl")]
12789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12790#[cfg_attr(test, assert_instr(vpmovqd))]
12791pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12792 let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
12793 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12794}
12795
12796/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12797///
12798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
12799#[inline]
12800#[target_feature(enable = "avx512f,avx512vl")]
12801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12802#[cfg_attr(test, assert_instr(vpmovqd))]
12803pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
12804 let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
12805 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
12806 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12807}
12808
12809/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12810///
12811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
12812#[inline]
12813#[target_feature(enable = "avx512f,avx512vl")]
12814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12815#[cfg_attr(test, assert_instr(vpmovqd))]
12816pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
12817 transmute(src:vpmovqd128(
12818 a:a.as_i64x2(),
12819 src:_mm_setzero_si128().as_i32x4(),
12820 mask:0b11111111,
12821 ))
12822}
12823
12824/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12825///
12826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
12827#[inline]
12828#[target_feature(enable = "avx512f,avx512vl")]
12829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12830#[cfg_attr(test, assert_instr(vpmovqd))]
12831pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12832 transmute(src:vpmovqd128(a:a.as_i64x2(), src:src.as_i32x4(), mask:k))
12833}
12834
12835/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12836///
12837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
12838#[inline]
12839#[target_feature(enable = "avx512f,avx512vl")]
12840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12841#[cfg_attr(test, assert_instr(vpmovqd))]
12842pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
12843 transmute(src:vpmovqd128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i32x4(), mask:k))
12844}
12845
12846/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12847///
12848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
12849#[inline]
12850#[target_feature(enable = "avx512f")]
12851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12852#[cfg_attr(test, assert_instr(vpmovqw))]
12853pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
12854 let a: i64x8 = a.as_i64x8();
12855 transmute::<i16x8, _>(src:simd_cast(a))
12856}
12857
12858/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12859///
12860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
12861#[inline]
12862#[target_feature(enable = "avx512f")]
12863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12864#[cfg_attr(test, assert_instr(vpmovqw))]
12865pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12866 let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
12867 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12868}
12869
12870/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12871///
12872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
12873#[inline]
12874#[target_feature(enable = "avx512f")]
12875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12876#[cfg_attr(test, assert_instr(vpmovqw))]
12877pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
12878 let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
12879 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
12880 transmute(src:simd_select_bitmask(m:k, yes:convert, no:zero))
12881}
12882
12883/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12884///
12885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
12886#[inline]
12887#[target_feature(enable = "avx512f,avx512vl")]
12888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12889#[cfg_attr(test, assert_instr(vpmovqw))]
12890pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
12891 transmute(src:vpmovqw256(
12892 a:a.as_i64x4(),
12893 src:_mm_setzero_si128().as_i16x8(),
12894 mask:0b11111111,
12895 ))
12896}
12897
12898/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12899///
12900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
12901#[inline]
12902#[target_feature(enable = "avx512f,avx512vl")]
12903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12904#[cfg_attr(test, assert_instr(vpmovqw))]
12905pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12906 transmute(src:vpmovqw256(a:a.as_i64x4(), src:src.as_i16x8(), mask:k))
12907}
12908
12909/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12910///
12911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
12912#[inline]
12913#[target_feature(enable = "avx512f,avx512vl")]
12914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12915#[cfg_attr(test, assert_instr(vpmovqw))]
12916pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
12917 transmute(src:vpmovqw256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
12918}
12919
12920/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12921///
12922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
12923#[inline]
12924#[target_feature(enable = "avx512f,avx512vl")]
12925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12926#[cfg_attr(test, assert_instr(vpmovqw))]
12927pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
12928 transmute(src:vpmovqw128(
12929 a:a.as_i64x2(),
12930 src:_mm_setzero_si128().as_i16x8(),
12931 mask:0b11111111,
12932 ))
12933}
12934
12935/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12936///
12937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
12938#[inline]
12939#[target_feature(enable = "avx512f,avx512vl")]
12940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12941#[cfg_attr(test, assert_instr(vpmovqw))]
12942pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12943 transmute(src:vpmovqw128(a:a.as_i64x2(), src:src.as_i16x8(), mask:k))
12944}
12945
12946/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12947///
12948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
12949#[inline]
12950#[target_feature(enable = "avx512f,avx512vl")]
12951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12952#[cfg_attr(test, assert_instr(vpmovqw))]
12953pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
12954 transmute(src:vpmovqw128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i16x8(), mask:k))
12955}
12956
12957/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12958///
12959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
12960#[inline]
12961#[target_feature(enable = "avx512f")]
12962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12963#[cfg_attr(test, assert_instr(vpmovqb))]
12964pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
12965 transmute(src:vpmovqb(
12966 a:a.as_i64x8(),
12967 src:_mm_setzero_si128().as_i8x16(),
12968 mask:0b11111111,
12969 ))
12970}
12971
12972/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12973///
12974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
12975#[inline]
12976#[target_feature(enable = "avx512f")]
12977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12978#[cfg_attr(test, assert_instr(vpmovqb))]
12979pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12980 transmute(src:vpmovqb(a:a.as_i64x8(), src:src.as_i8x16(), mask:k))
12981}
12982
12983/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12984///
12985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
12986#[inline]
12987#[target_feature(enable = "avx512f")]
12988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12989#[cfg_attr(test, assert_instr(vpmovqb))]
12990pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
12991 transmute(src:vpmovqb(a:a.as_i64x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
12992}
12993
12994/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12995///
12996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
12997#[inline]
12998#[target_feature(enable = "avx512f,avx512vl")]
12999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13000#[cfg_attr(test, assert_instr(vpmovqb))]
13001pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13002 transmute(src:vpmovqb256(
13003 a:a.as_i64x4(),
13004 src:_mm_setzero_si128().as_i8x16(),
13005 mask:0b11111111,
13006 ))
13007}
13008
13009/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13010///
13011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13012#[inline]
13013#[target_feature(enable = "avx512f,avx512vl")]
13014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13015#[cfg_attr(test, assert_instr(vpmovqb))]
13016pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13017 transmute(src:vpmovqb256(a:a.as_i64x4(), src:src.as_i8x16(), mask:k))
13018}
13019
13020/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13021///
13022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13023#[inline]
13024#[target_feature(enable = "avx512f,avx512vl")]
13025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13026#[cfg_attr(test, assert_instr(vpmovqb))]
13027pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13028 transmute(src:vpmovqb256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13029}
13030
13031/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13032///
13033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13034#[inline]
13035#[target_feature(enable = "avx512f,avx512vl")]
13036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13037#[cfg_attr(test, assert_instr(vpmovqb))]
13038pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13039 transmute(src:vpmovqb128(
13040 a:a.as_i64x2(),
13041 src:_mm_setzero_si128().as_i8x16(),
13042 mask:0b11111111,
13043 ))
13044}
13045
13046/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13047///
13048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13049#[inline]
13050#[target_feature(enable = "avx512f,avx512vl")]
13051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13052#[cfg_attr(test, assert_instr(vpmovqb))]
13053pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13054 transmute(src:vpmovqb128(a:a.as_i64x2(), src:src.as_i8x16(), mask:k))
13055}
13056
13057/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13058///
13059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13060#[inline]
13061#[target_feature(enable = "avx512f,avx512vl")]
13062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13063#[cfg_attr(test, assert_instr(vpmovqb))]
13064pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13065 transmute(src:vpmovqb128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13066}
13067
13068/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13069///
13070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13071#[inline]
13072#[target_feature(enable = "avx512f")]
13073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13074#[cfg_attr(test, assert_instr(vpmovsdw))]
13075pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13076 transmute(src:vpmovsdw(
13077 a:a.as_i32x16(),
13078 src:_mm256_setzero_si256().as_i16x16(),
13079 mask:0b11111111_11111111,
13080 ))
13081}
13082
13083/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13084///
13085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13086#[inline]
13087#[target_feature(enable = "avx512f")]
13088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13089#[cfg_attr(test, assert_instr(vpmovsdw))]
13090pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13091 transmute(src:vpmovsdw(a:a.as_i32x16(), src:src.as_i16x16(), mask:k))
13092}
13093
13094/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13095///
13096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13097#[inline]
13098#[target_feature(enable = "avx512f")]
13099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13100#[cfg_attr(test, assert_instr(vpmovsdw))]
13101pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13102 transmute(src:vpmovsdw(
13103 a:a.as_i32x16(),
13104 src:_mm256_setzero_si256().as_i16x16(),
13105 mask:k,
13106 ))
13107}
13108
13109/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115#[cfg_attr(test, assert_instr(vpmovsdw))]
13116pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13117 transmute(src:vpmovsdw256(
13118 a:a.as_i32x8(),
13119 src:_mm_setzero_si128().as_i16x8(),
13120 mask:0b11111111,
13121 ))
13122}
13123
13124/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13125///
13126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13127#[inline]
13128#[target_feature(enable = "avx512f,avx512vl")]
13129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13130#[cfg_attr(test, assert_instr(vpmovsdw))]
13131pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13132 transmute(src:vpmovsdw256(a:a.as_i32x8(), src:src.as_i16x8(), mask:k))
13133}
13134
13135/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13136///
13137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13138#[inline]
13139#[target_feature(enable = "avx512f,avx512vl")]
13140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13141#[cfg_attr(test, assert_instr(vpmovsdw))]
13142pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13143 transmute(src:vpmovsdw256(a:a.as_i32x8(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13144}
13145
13146/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13147///
13148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13149#[inline]
13150#[target_feature(enable = "avx512f,avx512vl")]
13151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13152#[cfg_attr(test, assert_instr(vpmovsdw))]
13153pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13154 transmute(src:vpmovsdw128(
13155 a:a.as_i32x4(),
13156 src:_mm_setzero_si128().as_i16x8(),
13157 mask:0b11111111,
13158 ))
13159}
13160
13161/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13162///
13163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13164#[inline]
13165#[target_feature(enable = "avx512f,avx512vl")]
13166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13167#[cfg_attr(test, assert_instr(vpmovsdw))]
13168pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13169 transmute(src:vpmovsdw128(a:a.as_i32x4(), src:src.as_i16x8(), mask:k))
13170}
13171
13172/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13173///
13174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13175#[inline]
13176#[target_feature(enable = "avx512f,avx512vl")]
13177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13178#[cfg_attr(test, assert_instr(vpmovsdw))]
13179pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13180 transmute(src:vpmovsdw128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13181}
13182
13183/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13184///
13185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13186#[inline]
13187#[target_feature(enable = "avx512f")]
13188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13189#[cfg_attr(test, assert_instr(vpmovsdb))]
13190pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13191 transmute(src:vpmovsdb(
13192 a:a.as_i32x16(),
13193 src:_mm_setzero_si128().as_i8x16(),
13194 mask:0b11111111_11111111,
13195 ))
13196}
13197
13198/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13201#[inline]
13202#[target_feature(enable = "avx512f")]
13203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204#[cfg_attr(test, assert_instr(vpmovsdb))]
13205pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13206 transmute(src:vpmovsdb(a:a.as_i32x16(), src:src.as_i8x16(), mask:k))
13207}
13208
13209/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13210///
13211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13212#[inline]
13213#[target_feature(enable = "avx512f")]
13214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13215#[cfg_attr(test, assert_instr(vpmovsdb))]
13216pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13217 transmute(src:vpmovsdb(a:a.as_i32x16(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13218}
13219
13220/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13221///
13222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13223#[inline]
13224#[target_feature(enable = "avx512f,avx512vl")]
13225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13226#[cfg_attr(test, assert_instr(vpmovsdb))]
13227pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13228 transmute(src:vpmovsdb256(
13229 a:a.as_i32x8(),
13230 src:_mm_setzero_si128().as_i8x16(),
13231 mask:0b11111111,
13232 ))
13233}
13234
13235/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13236///
13237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13238#[inline]
13239#[target_feature(enable = "avx512f,avx512vl")]
13240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13241#[cfg_attr(test, assert_instr(vpmovsdb))]
13242pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13243 transmute(src:vpmovsdb256(a:a.as_i32x8(), src:src.as_i8x16(), mask:k))
13244}
13245
13246/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13247///
13248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13249#[inline]
13250#[target_feature(enable = "avx512f,avx512vl")]
13251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13252#[cfg_attr(test, assert_instr(vpmovsdb))]
13253pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13254 transmute(src:vpmovsdb256(a:a.as_i32x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13255}
13256
13257/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13258///
13259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13260#[inline]
13261#[target_feature(enable = "avx512f,avx512vl")]
13262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13263#[cfg_attr(test, assert_instr(vpmovsdb))]
13264pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13265 transmute(src:vpmovsdb128(
13266 a:a.as_i32x4(),
13267 src:_mm_setzero_si128().as_i8x16(),
13268 mask:0b11111111,
13269 ))
13270}
13271
13272/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13273///
13274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13275#[inline]
13276#[target_feature(enable = "avx512f,avx512vl")]
13277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13278#[cfg_attr(test, assert_instr(vpmovsdb))]
13279pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13280 transmute(src:vpmovsdb128(a:a.as_i32x4(), src:src.as_i8x16(), mask:k))
13281}
13282
13283/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13284///
13285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13286#[inline]
13287#[target_feature(enable = "avx512f,avx512vl")]
13288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13289#[cfg_attr(test, assert_instr(vpmovsdb))]
13290pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13291 transmute(src:vpmovsdb128(a:a.as_i32x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13292}
13293
13294/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13295///
13296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13297#[inline]
13298#[target_feature(enable = "avx512f")]
13299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13300#[cfg_attr(test, assert_instr(vpmovsqd))]
13301pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13302 transmute(src:vpmovsqd(
13303 a:a.as_i64x8(),
13304 src:_mm256_setzero_si256().as_i32x8(),
13305 mask:0b11111111,
13306 ))
13307}
13308
13309/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13310///
13311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13312#[inline]
13313#[target_feature(enable = "avx512f")]
13314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13315#[cfg_attr(test, assert_instr(vpmovsqd))]
13316pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13317 transmute(src:vpmovsqd(a:a.as_i64x8(), src:src.as_i32x8(), mask:k))
13318}
13319
13320/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13321///
13322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13323#[inline]
13324#[target_feature(enable = "avx512f")]
13325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13326#[cfg_attr(test, assert_instr(vpmovsqd))]
13327pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13328 transmute(src:vpmovsqd(a:a.as_i64x8(), src:_mm256_setzero_si256().as_i32x8(), mask:k))
13329}
13330
13331/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13332///
13333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13334#[inline]
13335#[target_feature(enable = "avx512f,avx512vl")]
13336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13337#[cfg_attr(test, assert_instr(vpmovsqd))]
13338pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13339 transmute(src:vpmovsqd256(
13340 a:a.as_i64x4(),
13341 src:_mm_setzero_si128().as_i32x4(),
13342 mask:0b11111111,
13343 ))
13344}
13345
13346/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13347///
13348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13349#[inline]
13350#[target_feature(enable = "avx512f,avx512vl")]
13351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13352#[cfg_attr(test, assert_instr(vpmovsqd))]
13353pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13354 transmute(src:vpmovsqd256(a:a.as_i64x4(), src:src.as_i32x4(), mask:k))
13355}
13356
13357/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13358///
13359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13360#[inline]
13361#[target_feature(enable = "avx512f,avx512vl")]
13362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13363#[cfg_attr(test, assert_instr(vpmovsqd))]
13364pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13365 transmute(src:vpmovsqd256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i32x4(), mask:k))
13366}
13367
13368/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13369///
13370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13371#[inline]
13372#[target_feature(enable = "avx512f,avx512vl")]
13373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13374#[cfg_attr(test, assert_instr(vpmovsqd))]
13375pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13376 transmute(src:vpmovsqd128(
13377 a:a.as_i64x2(),
13378 src:_mm_setzero_si128().as_i32x4(),
13379 mask:0b11111111,
13380 ))
13381}
13382
13383/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13384///
13385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13386#[inline]
13387#[target_feature(enable = "avx512f,avx512vl")]
13388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13389#[cfg_attr(test, assert_instr(vpmovsqd))]
13390pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13391 transmute(src:vpmovsqd128(a:a.as_i64x2(), src:src.as_i32x4(), mask:k))
13392}
13393
13394/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13395///
13396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13397#[inline]
13398#[target_feature(enable = "avx512f,avx512vl")]
13399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13400#[cfg_attr(test, assert_instr(vpmovsqd))]
13401pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13402 transmute(src:vpmovsqd128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i32x4(), mask:k))
13403}
13404
13405/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13406///
13407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13408#[inline]
13409#[target_feature(enable = "avx512f")]
13410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13411#[cfg_attr(test, assert_instr(vpmovsqw))]
13412pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13413 transmute(src:vpmovsqw(
13414 a:a.as_i64x8(),
13415 src:_mm_setzero_si128().as_i16x8(),
13416 mask:0b11111111,
13417 ))
13418}
13419
13420/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13421///
13422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13423#[inline]
13424#[target_feature(enable = "avx512f")]
13425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13426#[cfg_attr(test, assert_instr(vpmovsqw))]
13427pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13428 transmute(src:vpmovsqw(a:a.as_i64x8(), src:src.as_i16x8(), mask:k))
13429}
13430
13431/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13432///
13433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13434#[inline]
13435#[target_feature(enable = "avx512f")]
13436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13437#[cfg_attr(test, assert_instr(vpmovsqw))]
13438pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13439 transmute(src:vpmovsqw(a:a.as_i64x8(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13440}
13441
13442/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13443///
13444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13445#[inline]
13446#[target_feature(enable = "avx512f,avx512vl")]
13447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13448#[cfg_attr(test, assert_instr(vpmovsqw))]
13449pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13450 transmute(src:vpmovsqw256(
13451 a:a.as_i64x4(),
13452 src:_mm_setzero_si128().as_i16x8(),
13453 mask:0b11111111,
13454 ))
13455}
13456
13457/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13458///
13459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13460#[inline]
13461#[target_feature(enable = "avx512f,avx512vl")]
13462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13463#[cfg_attr(test, assert_instr(vpmovsqw))]
13464pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13465 transmute(src:vpmovsqw256(a:a.as_i64x4(), src:src.as_i16x8(), mask:k))
13466}
13467
13468/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13469///
13470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13471#[inline]
13472#[target_feature(enable = "avx512f,avx512vl")]
13473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13474#[cfg_attr(test, assert_instr(vpmovsqw))]
13475pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13476 transmute(src:vpmovsqw256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13477}
13478
13479/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13480///
13481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13482#[inline]
13483#[target_feature(enable = "avx512f,avx512vl")]
13484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13485#[cfg_attr(test, assert_instr(vpmovsqw))]
13486pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13487 transmute(src:vpmovsqw128(
13488 a:a.as_i64x2(),
13489 src:_mm_setzero_si128().as_i16x8(),
13490 mask:0b11111111,
13491 ))
13492}
13493
13494/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13495///
13496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13497#[inline]
13498#[target_feature(enable = "avx512f,avx512vl")]
13499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13500#[cfg_attr(test, assert_instr(vpmovsqw))]
13501pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13502 transmute(src:vpmovsqw128(a:a.as_i64x2(), src:src.as_i16x8(), mask:k))
13503}
13504
13505/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13506///
13507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13508#[inline]
13509#[target_feature(enable = "avx512f,avx512vl")]
13510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13511#[cfg_attr(test, assert_instr(vpmovsqw))]
13512pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13513 transmute(src:vpmovsqw128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i16x8(), mask:k))
13514}
13515
13516/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13517///
13518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13519#[inline]
13520#[target_feature(enable = "avx512f")]
13521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13522#[cfg_attr(test, assert_instr(vpmovsqb))]
13523pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13524 transmute(src:vpmovsqb(
13525 a:a.as_i64x8(),
13526 src:_mm_setzero_si128().as_i8x16(),
13527 mask:0b11111111,
13528 ))
13529}
13530
13531/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13532///
13533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13534#[inline]
13535#[target_feature(enable = "avx512f")]
13536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13537#[cfg_attr(test, assert_instr(vpmovsqb))]
13538pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13539 transmute(src:vpmovsqb(a:a.as_i64x8(), src:src.as_i8x16(), mask:k))
13540}
13541
13542/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13543///
13544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13545#[inline]
13546#[target_feature(enable = "avx512f")]
13547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13548#[cfg_attr(test, assert_instr(vpmovsqb))]
13549pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13550 transmute(src:vpmovsqb(a:a.as_i64x8(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13551}
13552
13553/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13554///
13555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13556#[inline]
13557#[target_feature(enable = "avx512f,avx512vl")]
13558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13559#[cfg_attr(test, assert_instr(vpmovsqb))]
13560pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13561 transmute(src:vpmovsqb256(
13562 a:a.as_i64x4(),
13563 src:_mm_setzero_si128().as_i8x16(),
13564 mask:0b11111111,
13565 ))
13566}
13567
13568/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13569///
13570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13571#[inline]
13572#[target_feature(enable = "avx512f,avx512vl")]
13573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13574#[cfg_attr(test, assert_instr(vpmovsqb))]
13575pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13576 transmute(src:vpmovsqb256(a:a.as_i64x4(), src:src.as_i8x16(), mask:k))
13577}
13578
13579/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13580///
13581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13582#[inline]
13583#[target_feature(enable = "avx512f,avx512vl")]
13584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13585#[cfg_attr(test, assert_instr(vpmovsqb))]
13586pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13587 transmute(src:vpmovsqb256(a:a.as_i64x4(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13588}
13589
13590/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13591///
13592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13593#[inline]
13594#[target_feature(enable = "avx512f,avx512vl")]
13595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13596#[cfg_attr(test, assert_instr(vpmovsqb))]
13597pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13598 transmute(src:vpmovsqb128(
13599 a:a.as_i64x2(),
13600 src:_mm_setzero_si128().as_i8x16(),
13601 mask:0b11111111,
13602 ))
13603}
13604
13605/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13606///
13607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13608#[inline]
13609#[target_feature(enable = "avx512f,avx512vl")]
13610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13611#[cfg_attr(test, assert_instr(vpmovsqb))]
13612pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13613 transmute(src:vpmovsqb128(a:a.as_i64x2(), src:src.as_i8x16(), mask:k))
13614}
13615
13616/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13617///
13618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13619#[inline]
13620#[target_feature(enable = "avx512f,avx512vl")]
13621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13622#[cfg_attr(test, assert_instr(vpmovsqb))]
13623pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13624 transmute(src:vpmovsqb128(a:a.as_i64x2(), src:_mm_setzero_si128().as_i8x16(), mask:k))
13625}
13626
13627/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13628///
13629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13630#[inline]
13631#[target_feature(enable = "avx512f")]
13632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13633#[cfg_attr(test, assert_instr(vpmovusdw))]
13634pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13635 transmute(src:vpmovusdw(
13636 a:a.as_u32x16(),
13637 src:_mm256_setzero_si256().as_u16x16(),
13638 mask:0b11111111_11111111,
13639 ))
13640}
13641
13642/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13645#[inline]
13646#[target_feature(enable = "avx512f")]
13647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648#[cfg_attr(test, assert_instr(vpmovusdw))]
13649pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13650 transmute(src:vpmovusdw(a:a.as_u32x16(), src:src.as_u16x16(), mask:k))
13651}
13652
13653/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13656#[inline]
13657#[target_feature(enable = "avx512f")]
13658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659#[cfg_attr(test, assert_instr(vpmovusdw))]
13660pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13661 transmute(src:vpmovusdw(
13662 a:a.as_u32x16(),
13663 src:_mm256_setzero_si256().as_u16x16(),
13664 mask:k,
13665 ))
13666}
13667
13668/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13669///
13670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13671#[inline]
13672#[target_feature(enable = "avx512f,avx512vl")]
13673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13674#[cfg_attr(test, assert_instr(vpmovusdw))]
13675pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
13676 transmute(src:vpmovusdw256(
13677 a:a.as_u32x8(),
13678 src:_mm_setzero_si128().as_u16x8(),
13679 mask:0b11111111,
13680 ))
13681}
13682
13683/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13684///
13685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
13686#[inline]
13687#[target_feature(enable = "avx512f,avx512vl")]
13688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13689#[cfg_attr(test, assert_instr(vpmovusdw))]
13690pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13691 transmute(src:vpmovusdw256(a:a.as_u32x8(), src:src.as_u16x8(), mask:k))
13692}
13693
13694/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13695///
13696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
13697#[inline]
13698#[target_feature(enable = "avx512f,avx512vl")]
13699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13700#[cfg_attr(test, assert_instr(vpmovusdw))]
13701pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13702 transmute(src:vpmovusdw256(
13703 a:a.as_u32x8(),
13704 src:_mm_setzero_si128().as_u16x8(),
13705 mask:k,
13706 ))
13707}
13708
13709/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13710///
13711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
13712#[inline]
13713#[target_feature(enable = "avx512f,avx512vl")]
13714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13715#[cfg_attr(test, assert_instr(vpmovusdw))]
13716pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
13717 transmute(src:vpmovusdw128(
13718 a:a.as_u32x4(),
13719 src:_mm_setzero_si128().as_u16x8(),
13720 mask:0b11111111,
13721 ))
13722}
13723
13724/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13725///
13726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
13727#[inline]
13728#[target_feature(enable = "avx512f,avx512vl")]
13729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13730#[cfg_attr(test, assert_instr(vpmovusdw))]
13731pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13732 transmute(src:vpmovusdw128(a:a.as_u32x4(), src:src.as_u16x8(), mask:k))
13733}
13734
13735/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13736///
13737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
13738#[inline]
13739#[target_feature(enable = "avx512f,avx512vl")]
13740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13741#[cfg_attr(test, assert_instr(vpmovusdw))]
13742pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13743 transmute(src:vpmovusdw128(
13744 a:a.as_u32x4(),
13745 src:_mm_setzero_si128().as_u16x8(),
13746 mask:k,
13747 ))
13748}
13749
13750/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13751///
13752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
13753#[inline]
13754#[target_feature(enable = "avx512f")]
13755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13756#[cfg_attr(test, assert_instr(vpmovusdb))]
13757pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
13758 transmute(src:vpmovusdb(
13759 a:a.as_u32x16(),
13760 src:_mm_setzero_si128().as_u8x16(),
13761 mask:0b11111111_11111111,
13762 ))
13763}
13764
13765/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13766///
13767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
13768#[inline]
13769#[target_feature(enable = "avx512f")]
13770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13771#[cfg_attr(test, assert_instr(vpmovusdb))]
13772pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13773 transmute(src:vpmovusdb(a:a.as_u32x16(), src:src.as_u8x16(), mask:k))
13774}
13775
13776/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13777///
13778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
13779#[inline]
13780#[target_feature(enable = "avx512f")]
13781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13782#[cfg_attr(test, assert_instr(vpmovusdb))]
13783pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13784 transmute(src:vpmovusdb(a:a.as_u32x16(), src:_mm_setzero_si128().as_u8x16(), mask:k))
13785}
13786
13787/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13788///
13789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
13790#[inline]
13791#[target_feature(enable = "avx512f,avx512vl")]
13792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13793#[cfg_attr(test, assert_instr(vpmovusdb))]
13794pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
13795 transmute(src:vpmovusdb256(
13796 a:a.as_u32x8(),
13797 src:_mm_setzero_si128().as_u8x16(),
13798 mask:0b11111111,
13799 ))
13800}
13801
13802/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13803///
13804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
13805#[inline]
13806#[target_feature(enable = "avx512f,avx512vl")]
13807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13808#[cfg_attr(test, assert_instr(vpmovusdb))]
13809pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13810 transmute(src:vpmovusdb256(a:a.as_u32x8(), src:src.as_u8x16(), mask:k))
13811}
13812
13813/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13814///
13815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
13816#[inline]
13817#[target_feature(enable = "avx512f,avx512vl")]
13818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13819#[cfg_attr(test, assert_instr(vpmovusdb))]
13820pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13821 transmute(src:vpmovusdb256(
13822 a:a.as_u32x8(),
13823 src:_mm_setzero_si128().as_u8x16(),
13824 mask:k,
13825 ))
13826}
13827
13828/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13829///
13830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
13831#[inline]
13832#[target_feature(enable = "avx512f,avx512vl")]
13833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13834#[cfg_attr(test, assert_instr(vpmovusdb))]
13835pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
13836 transmute(src:vpmovusdb128(
13837 a:a.as_u32x4(),
13838 src:_mm_setzero_si128().as_u8x16(),
13839 mask:0b11111111,
13840 ))
13841}
13842
13843/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13844///
13845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
13846#[inline]
13847#[target_feature(enable = "avx512f,avx512vl")]
13848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13849#[cfg_attr(test, assert_instr(vpmovusdb))]
13850pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13851 transmute(src:vpmovusdb128(a:a.as_u32x4(), src:src.as_u8x16(), mask:k))
13852}
13853
13854/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13855///
13856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
13857#[inline]
13858#[target_feature(enable = "avx512f,avx512vl")]
13859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13860#[cfg_attr(test, assert_instr(vpmovusdb))]
13861pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13862 transmute(src:vpmovusdb128(
13863 a:a.as_u32x4(),
13864 src:_mm_setzero_si128().as_u8x16(),
13865 mask:k,
13866 ))
13867}
13868
13869/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13870///
13871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
13872#[inline]
13873#[target_feature(enable = "avx512f")]
13874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13875#[cfg_attr(test, assert_instr(vpmovusqd))]
13876pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
13877 transmute(src:vpmovusqd(
13878 a:a.as_u64x8(),
13879 src:_mm256_setzero_si256().as_u32x8(),
13880 mask:0b11111111,
13881 ))
13882}
13883
13884/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890#[cfg_attr(test, assert_instr(vpmovusqd))]
13891pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13892 transmute(src:vpmovusqd(a:a.as_u64x8(), src:src.as_u32x8(), mask:k))
13893}
13894
13895/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
13898#[inline]
13899#[target_feature(enable = "avx512f")]
13900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901#[cfg_attr(test, assert_instr(vpmovusqd))]
13902pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13903 transmute(src:vpmovusqd(
13904 a:a.as_u64x8(),
13905 src:_mm256_setzero_si256().as_u32x8(),
13906 mask:k,
13907 ))
13908}
13909
13910/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13911///
13912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
13913#[inline]
13914#[target_feature(enable = "avx512f,avx512vl")]
13915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13916#[cfg_attr(test, assert_instr(vpmovusqd))]
13917pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
13918 transmute(src:vpmovusqd256(
13919 a:a.as_u64x4(),
13920 src:_mm_setzero_si128().as_u32x4(),
13921 mask:0b11111111,
13922 ))
13923}
13924
13925/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13926///
13927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
13928#[inline]
13929#[target_feature(enable = "avx512f,avx512vl")]
13930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13931#[cfg_attr(test, assert_instr(vpmovusqd))]
13932pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13933 transmute(src:vpmovusqd256(a:a.as_u64x4(), src:src.as_u32x4(), mask:k))
13934}
13935
13936/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13937///
13938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
13939#[inline]
13940#[target_feature(enable = "avx512f,avx512vl")]
13941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13942#[cfg_attr(test, assert_instr(vpmovusqd))]
13943pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13944 transmute(src:vpmovusqd256(
13945 a:a.as_u64x4(),
13946 src:_mm_setzero_si128().as_u32x4(),
13947 mask:k,
13948 ))
13949}
13950
13951/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13952///
13953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
13954#[inline]
13955#[target_feature(enable = "avx512f,avx512vl")]
13956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13957#[cfg_attr(test, assert_instr(vpmovusqd))]
13958pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
13959 transmute(src:vpmovusqd128(
13960 a:a.as_u64x2(),
13961 src:_mm_setzero_si128().as_u32x4(),
13962 mask:0b11111111,
13963 ))
13964}
13965
13966/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13967///
13968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
13969#[inline]
13970#[target_feature(enable = "avx512f,avx512vl")]
13971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13972#[cfg_attr(test, assert_instr(vpmovusqd))]
13973pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13974 transmute(src:vpmovusqd128(a:a.as_u64x2(), src:src.as_u32x4(), mask:k))
13975}
13976
13977/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13978///
13979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
13980#[inline]
13981#[target_feature(enable = "avx512f,avx512vl")]
13982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13983#[cfg_attr(test, assert_instr(vpmovusqd))]
13984pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13985 transmute(src:vpmovusqd128(
13986 a:a.as_u64x2(),
13987 src:_mm_setzero_si128().as_u32x4(),
13988 mask:k,
13989 ))
13990}
13991
13992/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13993///
13994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
13995#[inline]
13996#[target_feature(enable = "avx512f")]
13997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13998#[cfg_attr(test, assert_instr(vpmovusqw))]
13999pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14000 transmute(src:vpmovusqw(
14001 a:a.as_u64x8(),
14002 src:_mm_setzero_si128().as_u16x8(),
14003 mask:0b11111111,
14004 ))
14005}
14006
14007/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14008///
14009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14010#[inline]
14011#[target_feature(enable = "avx512f")]
14012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14013#[cfg_attr(test, assert_instr(vpmovusqw))]
14014pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14015 transmute(src:vpmovusqw(a:a.as_u64x8(), src:src.as_u16x8(), mask:k))
14016}
14017
14018/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14019///
14020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14021#[inline]
14022#[target_feature(enable = "avx512f")]
14023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14024#[cfg_attr(test, assert_instr(vpmovusqw))]
14025pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14026 transmute(src:vpmovusqw(a:a.as_u64x8(), src:_mm_setzero_si128().as_u16x8(), mask:k))
14027}
14028
14029/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14030///
14031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14032#[inline]
14033#[target_feature(enable = "avx512f,avx512vl")]
14034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14035#[cfg_attr(test, assert_instr(vpmovusqw))]
14036pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14037 transmute(src:vpmovusqw256(
14038 a:a.as_u64x4(),
14039 src:_mm_setzero_si128().as_u16x8(),
14040 mask:0b11111111,
14041 ))
14042}
14043
14044/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14045///
14046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14047#[inline]
14048#[target_feature(enable = "avx512f,avx512vl")]
14049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14050#[cfg_attr(test, assert_instr(vpmovusqw))]
14051pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14052 transmute(src:vpmovusqw256(a:a.as_u64x4(), src:src.as_u16x8(), mask:k))
14053}
14054
14055/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14056///
14057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14058#[inline]
14059#[target_feature(enable = "avx512f,avx512vl")]
14060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14061#[cfg_attr(test, assert_instr(vpmovusqw))]
14062pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14063 transmute(src:vpmovusqw256(
14064 a:a.as_u64x4(),
14065 src:_mm_setzero_si128().as_u16x8(),
14066 mask:k,
14067 ))
14068}
14069
14070/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14071///
14072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14073#[inline]
14074#[target_feature(enable = "avx512f,avx512vl")]
14075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14076#[cfg_attr(test, assert_instr(vpmovusqw))]
14077pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14078 transmute(src:vpmovusqw128(
14079 a:a.as_u64x2(),
14080 src:_mm_setzero_si128().as_u16x8(),
14081 mask:0b11111111,
14082 ))
14083}
14084
14085/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14086///
14087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14088#[inline]
14089#[target_feature(enable = "avx512f,avx512vl")]
14090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14091#[cfg_attr(test, assert_instr(vpmovusqw))]
14092pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14093 transmute(src:vpmovusqw128(a:a.as_u64x2(), src:src.as_u16x8(), mask:k))
14094}
14095
14096/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14097///
14098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14099#[inline]
14100#[target_feature(enable = "avx512f,avx512vl")]
14101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14102#[cfg_attr(test, assert_instr(vpmovusqw))]
14103pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14104 transmute(src:vpmovusqw128(
14105 a:a.as_u64x2(),
14106 src:_mm_setzero_si128().as_u16x8(),
14107 mask:k,
14108 ))
14109}
14110
14111/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14112///
14113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14114#[inline]
14115#[target_feature(enable = "avx512f")]
14116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14117#[cfg_attr(test, assert_instr(vpmovusqb))]
14118pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14119 transmute(src:vpmovusqb(
14120 a:a.as_u64x8(),
14121 src:_mm_setzero_si128().as_u8x16(),
14122 mask:0b11111111,
14123 ))
14124}
14125
14126/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14129#[inline]
14130#[target_feature(enable = "avx512f")]
14131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132#[cfg_attr(test, assert_instr(vpmovusqb))]
14133pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14134 transmute(src:vpmovusqb(a:a.as_u64x8(), src:src.as_u8x16(), mask:k))
14135}
14136
14137/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14140#[inline]
14141#[target_feature(enable = "avx512f")]
14142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143#[cfg_attr(test, assert_instr(vpmovusqb))]
14144pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14145 transmute(src:vpmovusqb(a:a.as_u64x8(), src:_mm_setzero_si128().as_u8x16(), mask:k))
14146}
14147
14148/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154#[cfg_attr(test, assert_instr(vpmovusqb))]
14155pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14156 transmute(src:vpmovusqb256(
14157 a:a.as_u64x4(),
14158 src:_mm_setzero_si128().as_u8x16(),
14159 mask:0b11111111,
14160 ))
14161}
14162
14163/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14164///
14165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14166#[inline]
14167#[target_feature(enable = "avx512f,avx512vl")]
14168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14169#[cfg_attr(test, assert_instr(vpmovusqb))]
14170pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14171 transmute(src:vpmovusqb256(a:a.as_u64x4(), src:src.as_u8x16(), mask:k))
14172}
14173
14174/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14175///
14176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14177#[inline]
14178#[target_feature(enable = "avx512f,avx512vl")]
14179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14180#[cfg_attr(test, assert_instr(vpmovusqb))]
14181pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14182 transmute(src:vpmovusqb256(
14183 a:a.as_u64x4(),
14184 src:_mm_setzero_si128().as_u8x16(),
14185 mask:k,
14186 ))
14187}
14188
14189/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14190///
14191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14192#[inline]
14193#[target_feature(enable = "avx512f,avx512vl")]
14194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14195#[cfg_attr(test, assert_instr(vpmovusqb))]
14196pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14197 transmute(src:vpmovusqb128(
14198 a:a.as_u64x2(),
14199 src:_mm_setzero_si128().as_u8x16(),
14200 mask:0b11111111,
14201 ))
14202}
14203
14204/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14205///
14206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14207#[inline]
14208#[target_feature(enable = "avx512f,avx512vl")]
14209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14210#[cfg_attr(test, assert_instr(vpmovusqb))]
14211pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14212 transmute(src:vpmovusqb128(a:a.as_u64x2(), src:src.as_u8x16(), mask:k))
14213}
14214
14215/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14216///
14217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14218#[inline]
14219#[target_feature(enable = "avx512f,avx512vl")]
14220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14221#[cfg_attr(test, assert_instr(vpmovusqb))]
14222pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14223 transmute(src:vpmovusqb128(
14224 a:a.as_u64x2(),
14225 src:_mm_setzero_si128().as_u8x16(),
14226 mask:k,
14227 ))
14228}
14229
14230/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14231///
14232/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14233/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
14234/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
14235/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
14236/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
14237/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14238///
14239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14240#[inline]
14241#[target_feature(enable = "avx512f")]
14242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14243#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14244#[rustc_legacy_const_generics(1)]
14245pub unsafe fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14246 static_assert_rounding!(ROUNDING);
14247 let a: f32x16 = a.as_f32x16();
14248 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
14249 let r: i32x16 = vcvtps2dq(a, src:zero, mask:0b11111111_11111111, ROUNDING);
14250 transmute(src:r)
14251}
14252
14253/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14254///
14255/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14256/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14257/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14258/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14259/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14260/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14261///
14262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14263#[inline]
14264#[target_feature(enable = "avx512f")]
14265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14266#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14267#[rustc_legacy_const_generics(3)]
14268pub unsafe fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14269 src: __m512i,
14270 k: __mmask16,
14271 a: __m512,
14272) -> __m512i {
14273 static_assert_rounding!(ROUNDING);
14274 let a: f32x16 = a.as_f32x16();
14275 let src: i32x16 = src.as_i32x16();
14276 let r: i32x16 = vcvtps2dq(a, src, mask:k, ROUNDING);
14277 transmute(src:r)
14278}
14279
14280/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14281///
14282/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14283/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14284/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14285/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14286/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14287/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14288///
14289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14290#[inline]
14291#[target_feature(enable = "avx512f")]
14292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14293#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14294#[rustc_legacy_const_generics(2)]
14295pub unsafe fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(
14296 k: __mmask16,
14297 a: __m512,
14298) -> __m512i {
14299 static_assert_rounding!(ROUNDING);
14300 let a: f32x16 = a.as_f32x16();
14301 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
14302 let r: i32x16 = vcvtps2dq(a, src:zero, mask:k, ROUNDING);
14303 transmute(src:r)
14304}
14305
14306/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14307///
14308/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14309/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14310/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14311/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14312/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14313/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14316#[inline]
14317#[target_feature(enable = "avx512f")]
14318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14320#[rustc_legacy_const_generics(1)]
14321pub unsafe fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14322 static_assert_rounding!(ROUNDING);
14323 let a: f32x16 = a.as_f32x16();
14324 let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
14325 let r: u32x16 = vcvtps2udq(a, src:zero, mask:0b11111111_11111111, ROUNDING);
14326 transmute(src:r)
14327}
14328
14329/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14330///
14331/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14332/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14333/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14334/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14335/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14336/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14337///
14338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14339#[inline]
14340#[target_feature(enable = "avx512f")]
14341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14342#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14343#[rustc_legacy_const_generics(3)]
14344pub unsafe fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14345 src: __m512i,
14346 k: __mmask16,
14347 a: __m512,
14348) -> __m512i {
14349 static_assert_rounding!(ROUNDING);
14350 let a: f32x16 = a.as_f32x16();
14351 let src: u32x16 = src.as_u32x16();
14352 let r: u32x16 = vcvtps2udq(a, src, mask:k, ROUNDING);
14353 transmute(src:r)
14354}
14355
14356/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14357///
14358/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14359/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14360/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14361/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14362/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14363/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14364///
14365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14366#[inline]
14367#[target_feature(enable = "avx512f")]
14368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14369#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14370#[rustc_legacy_const_generics(2)]
14371pub unsafe fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(
14372 k: __mmask16,
14373 a: __m512,
14374) -> __m512i {
14375 static_assert_rounding!(ROUNDING);
14376 let a: f32x16 = a.as_f32x16();
14377 let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
14378 let r: u32x16 = vcvtps2udq(a, src:zero, mask:k, ROUNDING);
14379 transmute(src:r)
14380}
14381
14382/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14383/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14384///
14385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14386#[inline]
14387#[target_feature(enable = "avx512f")]
14388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14389#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14390#[rustc_legacy_const_generics(1)]
14391pub unsafe fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14392 static_assert_sae!(SAE);
14393 let a: f32x8 = a.as_f32x8();
14394 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
14395 let r: f64x8 = vcvtps2pd(a, src:zero, mask:0b11111111, SAE);
14396 transmute(src:r)
14397}
14398
14399/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14400/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14401///
14402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14403#[inline]
14404#[target_feature(enable = "avx512f")]
14405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14406#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14407#[rustc_legacy_const_generics(3)]
14408pub unsafe fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(
14409 src: __m512d,
14410 k: __mmask8,
14411 a: __m256,
14412) -> __m512d {
14413 static_assert_sae!(SAE);
14414 let a: f32x8 = a.as_f32x8();
14415 let src: f64x8 = src.as_f64x8();
14416 let r: f64x8 = vcvtps2pd(a, src, mask:k, SAE);
14417 transmute(src:r)
14418}
14419
14420/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14421/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14422///
14423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14424#[inline]
14425#[target_feature(enable = "avx512f")]
14426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14427#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14428#[rustc_legacy_const_generics(2)]
14429pub unsafe fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14430 static_assert_sae!(SAE);
14431 let a: f32x8 = a.as_f32x8();
14432 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
14433 let r: f64x8 = vcvtps2pd(a, src:zero, mask:k, SAE);
14434 transmute(src:r)
14435}
14436
14437/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14438///
14439/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14440/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14441/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14442/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14443/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14444/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14445///
14446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14447#[inline]
14448#[target_feature(enable = "avx512f")]
14449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14450#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14451#[rustc_legacy_const_generics(1)]
14452pub unsafe fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14453 static_assert_rounding!(ROUNDING);
14454 let a: f64x8 = a.as_f64x8();
14455 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
14456 let r: i32x8 = vcvtpd2dq(a, src:zero, mask:0b11111111, ROUNDING);
14457 transmute(src:r)
14458}
14459
14460/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14461///
14462/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14463/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14464/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14465/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14466/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14467/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14468///
14469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14470#[inline]
14471#[target_feature(enable = "avx512f")]
14472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14473#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14474#[rustc_legacy_const_generics(3)]
14475pub unsafe fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14476 src: __m256i,
14477 k: __mmask8,
14478 a: __m512d,
14479) -> __m256i {
14480 static_assert_rounding!(ROUNDING);
14481 let a: f64x8 = a.as_f64x8();
14482 let src: i32x8 = src.as_i32x8();
14483 let r: i32x8 = vcvtpd2dq(a, src, mask:k, ROUNDING);
14484 transmute(src:r)
14485}
14486
14487/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14488///
14489/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14490/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14491/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14492/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14493/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14494/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14495///
14496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14497#[inline]
14498#[target_feature(enable = "avx512f")]
14499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14500#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14501#[rustc_legacy_const_generics(2)]
14502pub unsafe fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(
14503 k: __mmask8,
14504 a: __m512d,
14505) -> __m256i {
14506 static_assert_rounding!(ROUNDING);
14507 let a: f64x8 = a.as_f64x8();
14508 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
14509 let r: i32x8 = vcvtpd2dq(a, src:zero, mask:k, ROUNDING);
14510 transmute(src:r)
14511}
14512
14513/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14514///
14515/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14516/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14517/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14518/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14519/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14520/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14521///
14522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14523#[inline]
14524#[target_feature(enable = "avx512f")]
14525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14526#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14527#[rustc_legacy_const_generics(1)]
14528pub unsafe fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14529 static_assert_rounding!(ROUNDING);
14530 let a: f64x8 = a.as_f64x8();
14531 let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
14532 let r: u32x8 = vcvtpd2udq(a, src:zero, mask:0b11111111, ROUNDING);
14533 transmute(src:r)
14534}
14535
14536/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14537///
14538/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14539/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14540/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14541/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14542/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14543/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14544///
14545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14546#[inline]
14547#[target_feature(enable = "avx512f")]
14548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14549#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14550#[rustc_legacy_const_generics(3)]
14551pub unsafe fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14552 src: __m256i,
14553 k: __mmask8,
14554 a: __m512d,
14555) -> __m256i {
14556 static_assert_rounding!(ROUNDING);
14557 let a: f64x8 = a.as_f64x8();
14558 let src: u32x8 = src.as_u32x8();
14559 let r: u32x8 = vcvtpd2udq(a, src, mask:k, ROUNDING);
14560 transmute(src:r)
14561}
14562
14563/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14564///
14565/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14566/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14567/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14568/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14569/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14570/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14571///
14572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14573#[inline]
14574#[target_feature(enable = "avx512f")]
14575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14576#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14577#[rustc_legacy_const_generics(2)]
14578pub unsafe fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(
14579 k: __mmask8,
14580 a: __m512d,
14581) -> __m256i {
14582 static_assert_rounding!(ROUNDING);
14583 let a: f64x8 = a.as_f64x8();
14584 let zero: u32x8 = _mm256_setzero_si256().as_u32x8();
14585 let r: u32x8 = vcvtpd2udq(a, src:zero, mask:k, ROUNDING);
14586 transmute(src:r)
14587}
14588
14589/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14590///
14591/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14592/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14593/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14594/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14595/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14596/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14597///
14598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14599#[inline]
14600#[target_feature(enable = "avx512f")]
14601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14602#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14603#[rustc_legacy_const_generics(1)]
14604pub unsafe fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14605 static_assert_rounding!(ROUNDING);
14606 let a: f64x8 = a.as_f64x8();
14607 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
14608 let r: f32x8 = vcvtpd2ps(a, src:zero, mask:0b11111111, ROUNDING);
14609 transmute(src:r)
14610}
14611
14612/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14613///
14614/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14615/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14616/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14617/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14618/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14619/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14620///
14621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14622#[inline]
14623#[target_feature(enable = "avx512f")]
14624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14625#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14626#[rustc_legacy_const_generics(3)]
14627pub unsafe fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14628 src: __m256,
14629 k: __mmask8,
14630 a: __m512d,
14631) -> __m256 {
14632 static_assert_rounding!(ROUNDING);
14633 let a: f64x8 = a.as_f64x8();
14634 let src: f32x8 = src.as_f32x8();
14635 let r: f32x8 = vcvtpd2ps(a, src, mask:k, ROUNDING);
14636 transmute(src:r)
14637}
14638
14639/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14640///
14641/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14642/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14643/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14644/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14645/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14646/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14647///
14648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14649#[inline]
14650#[target_feature(enable = "avx512f")]
14651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14652#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14653#[rustc_legacy_const_generics(2)]
14654pub unsafe fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14655 static_assert_rounding!(ROUNDING);
14656 let a: f64x8 = a.as_f64x8();
14657 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
14658 let r: f32x8 = vcvtpd2ps(a, src:zero, mask:k, ROUNDING);
14659 transmute(src:r)
14660}
14661
14662/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14663///
14664/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14665/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14666/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14667/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14668/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14669/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14670///
14671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14672#[inline]
14673#[target_feature(enable = "avx512f")]
14674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14675#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14676#[rustc_legacy_const_generics(1)]
14677pub unsafe fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14678 static_assert_rounding!(ROUNDING);
14679 let a: i32x16 = a.as_i32x16();
14680 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14681 transmute(src:r)
14682}
14683
14684/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14685///
14686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14687/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14688/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14689/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14690/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14691/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14692///
14693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14694#[inline]
14695#[target_feature(enable = "avx512f")]
14696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14697#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14698#[rustc_legacy_const_generics(3)]
14699pub unsafe fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14700 src: __m512,
14701 k: __mmask16,
14702 a: __m512i,
14703) -> __m512 {
14704 static_assert_rounding!(ROUNDING);
14705 let a: i32x16 = a.as_i32x16();
14706 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14707 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14708}
14709
14710/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14711///
14712/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14713/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14714/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14715/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14716/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14717/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14718///
14719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14720#[inline]
14721#[target_feature(enable = "avx512f")]
14722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14723#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14724#[rustc_legacy_const_generics(2)]
14725pub unsafe fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(
14726 k: __mmask16,
14727 a: __m512i,
14728) -> __m512 {
14729 static_assert_rounding!(ROUNDING);
14730 let a: i32x16 = a.as_i32x16();
14731 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14732 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
14733 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
14734}
14735
14736/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14737///
14738/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14739/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14740/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14741/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14742/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14743/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14744///
14745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14746#[inline]
14747#[target_feature(enable = "avx512f")]
14748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14749#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14750#[rustc_legacy_const_generics(1)]
14751pub unsafe fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14752 static_assert_rounding!(ROUNDING);
14753 let a: u32x16 = a.as_u32x16();
14754 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14755 transmute(src:r)
14756}
14757
14758/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14759///
14760/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14761/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14762/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14763/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14764/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14765/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14766///
14767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
14768#[inline]
14769#[target_feature(enable = "avx512f")]
14770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14771#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14772#[rustc_legacy_const_generics(3)]
14773pub unsafe fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
14774 src: __m512,
14775 k: __mmask16,
14776 a: __m512i,
14777) -> __m512 {
14778 static_assert_rounding!(ROUNDING);
14779 let a: u32x16 = a.as_u32x16();
14780 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14781 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14782}
14783
14784/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14785///
14786/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14787/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14788/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14789/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14790/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14791/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14792///
14793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
14794#[inline]
14795#[target_feature(enable = "avx512f")]
14796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14797#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14798#[rustc_legacy_const_generics(2)]
14799pub unsafe fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(
14800 k: __mmask16,
14801 a: __m512i,
14802) -> __m512 {
14803 static_assert_rounding!(ROUNDING);
14804 let a: u32x16 = a.as_u32x16();
14805 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14806 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
14807 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
14808}
14809
14810/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
14811/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14812///
14813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
14814#[inline]
14815#[target_feature(enable = "avx512f")]
14816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14817#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14818#[rustc_legacy_const_generics(1)]
14819pub unsafe fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
14820 static_assert_sae!(SAE);
14821 let a: f32x16 = a.as_f32x16();
14822 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
14823 let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:0b11111111_11111111);
14824 transmute(src:r)
14825}
14826
14827/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14828/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14829///
14830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
14831#[inline]
14832#[target_feature(enable = "avx512f")]
14833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14834#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14835#[rustc_legacy_const_generics(3)]
14836pub unsafe fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
14837 src: __m256i,
14838 k: __mmask16,
14839 a: __m512,
14840) -> __m256i {
14841 static_assert_sae!(SAE);
14842 let a: f32x16 = a.as_f32x16();
14843 let src: i16x16 = src.as_i16x16();
14844 let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
14845 transmute(src:r)
14846}
14847
14848/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14849/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14850///
14851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
14852#[inline]
14853#[target_feature(enable = "avx512f")]
14854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14855#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14856#[rustc_legacy_const_generics(2)]
14857pub unsafe fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
14858 static_assert_sae!(SAE);
14859 let a: f32x16 = a.as_f32x16();
14860 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
14861 let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:k);
14862 transmute(src:r)
14863}
14864
14865/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
14867/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14868/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14869/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14870/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14871/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14872///
14873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
14874#[inline]
14875#[target_feature(enable = "avx512f,avx512vl")]
14876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14877#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14878#[rustc_legacy_const_generics(3)]
14879pub unsafe fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
14880 src: __m128i,
14881 k: __mmask8,
14882 a: __m256,
14883) -> __m128i {
14884 static_assert_uimm_bits!(IMM8, 8);
14885 let a: f32x8 = a.as_f32x8();
14886 let src: i16x8 = src.as_i16x8();
14887 let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
14888 transmute(src:r)
14889}
14890
14891/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14892/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14893/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14894/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14895/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14896/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14897/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14898///
14899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
14900#[inline]
14901#[target_feature(enable = "avx512f,avx512vl")]
14902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14903#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14904#[rustc_legacy_const_generics(2)]
14905pub unsafe fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
14906 static_assert_uimm_bits!(IMM8, 8);
14907 let a: f32x8 = a.as_f32x8();
14908 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
14909 let r: i16x8 = vcvtps2ph256(a, IMM8, src:zero, mask:k);
14910 transmute(src:r)
14911}
14912
14913/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14914/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14915/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14916/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14917/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14918/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14919/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14920///
14921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
14922#[inline]
14923#[target_feature(enable = "avx512f,avx512vl")]
14924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14925#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14926#[rustc_legacy_const_generics(3)]
14927pub unsafe fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(
14928 src: __m128i,
14929 k: __mmask8,
14930 a: __m128,
14931) -> __m128i {
14932 static_assert_uimm_bits!(IMM8, 8);
14933 let a: f32x4 = a.as_f32x4();
14934 let src: i16x8 = src.as_i16x8();
14935 let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
14936 transmute(src:r)
14937}
14938
14939/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14940/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14941/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
14942/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
14943/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
14944/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
14945/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
14946///
14947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
14948#[inline]
14949#[target_feature(enable = "avx512f,avx512vl")]
14950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14951#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14952#[rustc_legacy_const_generics(2)]
14953pub unsafe fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
14954 static_assert_uimm_bits!(IMM8, 8);
14955 let a: f32x4 = a.as_f32x4();
14956 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
14957 let r: i16x8 = vcvtps2ph128(a, IMM8, src:zero, mask:k);
14958 transmute(src:r)
14959}
14960
14961/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
14962/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14963///
14964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
14965#[inline]
14966#[target_feature(enable = "avx512f")]
14967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14968#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14969#[rustc_legacy_const_generics(1)]
14970pub unsafe fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
14971 static_assert_sae!(SAE);
14972 let a: f32x16 = a.as_f32x16();
14973 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
14974 let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:0b11111111_11111111);
14975 transmute(src:r)
14976}
14977
14978/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14979/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14986#[rustc_legacy_const_generics(3)]
14987pub unsafe fn _mm512_mask_cvtps_ph<const SAE: i32>(
14988 src: __m256i,
14989 k: __mmask16,
14990 a: __m512,
14991) -> __m256i {
14992 static_assert_sae!(SAE);
14993 let a: f32x16 = a.as_f32x16();
14994 let src: i16x16 = src.as_i16x16();
14995 let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
14996 transmute(src:r)
14997}
14998
14999/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15000/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15001///
15002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15003#[inline]
15004#[target_feature(enable = "avx512f")]
15005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15006#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15007#[rustc_legacy_const_generics(2)]
15008pub unsafe fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15009 static_assert_sae!(SAE);
15010 let a: f32x16 = a.as_f32x16();
15011 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
15012 let r: i16x16 = vcvtps2ph(a, SAE, src:zero, mask:k);
15013 transmute(src:r)
15014}
15015
15016/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15017/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15018/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15019/// _MM_FROUND_TO_NEG_INF // round down\
15020/// _MM_FROUND_TO_POS_INF // round up\
15021/// _MM_FROUND_TO_ZERO // truncate\
15022/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15023///
15024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15025#[inline]
15026#[target_feature(enable = "avx512f,avx512vl")]
15027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15028#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15029#[rustc_legacy_const_generics(3)]
15030pub unsafe fn _mm256_mask_cvtps_ph<const IMM8: i32>(
15031 src: __m128i,
15032 k: __mmask8,
15033 a: __m256,
15034) -> __m128i {
15035 static_assert_uimm_bits!(IMM8, 8);
15036 let a: f32x8 = a.as_f32x8();
15037 let src: i16x8 = src.as_i16x8();
15038 let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15039 transmute(src:r)
15040}
15041
15042/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15043/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15044/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15045/// _MM_FROUND_TO_NEG_INF // round down\
15046/// _MM_FROUND_TO_POS_INF // round up\
15047/// _MM_FROUND_TO_ZERO // truncate\
15048/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15049///
15050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15051#[inline]
15052#[target_feature(enable = "avx512f,avx512vl")]
15053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15054#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15055#[rustc_legacy_const_generics(2)]
15056pub unsafe fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15057 static_assert_uimm_bits!(IMM8, 8);
15058 let a: f32x8 = a.as_f32x8();
15059 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
15060 let r: i16x8 = vcvtps2ph256(a, IMM8, src:zero, mask:k);
15061 transmute(src:r)
15062}
15063
15064/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15065/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15066/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15067/// _MM_FROUND_TO_NEG_INF // round down\
15068/// _MM_FROUND_TO_POS_INF // round up\
15069/// _MM_FROUND_TO_ZERO // truncate\
15070/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15071///
15072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15073#[inline]
15074#[target_feature(enable = "avx512f,avx512vl")]
15075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15076#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15077#[rustc_legacy_const_generics(3)]
15078pub unsafe fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15079 static_assert_uimm_bits!(IMM8, 8);
15080 let a: f32x4 = a.as_f32x4();
15081 let src: i16x8 = src.as_i16x8();
15082 let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15083 transmute(src:r)
15084}
15085
15086/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15087/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15088/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
15089/// _MM_FROUND_TO_NEG_INF // round down\
15090/// _MM_FROUND_TO_POS_INF // round up\
15091/// _MM_FROUND_TO_ZERO // truncate\
15092/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
15093///
15094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15095#[inline]
15096#[target_feature(enable = "avx512f,avx512vl")]
15097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15098#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15099#[rustc_legacy_const_generics(2)]
15100pub unsafe fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15101 static_assert_uimm_bits!(IMM8, 8);
15102 let a: f32x4 = a.as_f32x4();
15103 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
15104 let r: i16x8 = vcvtps2ph128(a, IMM8, src:zero, mask:k);
15105 transmute(src:r)
15106}
15107
15108/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15109/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15110///
15111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15112#[inline]
15113#[target_feature(enable = "avx512f")]
15114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15115#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15116#[rustc_legacy_const_generics(1)]
15117pub unsafe fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15118 static_assert_sae!(SAE);
15119 let a: i16x16 = a.as_i16x16();
15120 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
15121 let r: f32x16 = vcvtph2ps(a, src:zero, mask:0b11111111_11111111, SAE);
15122 transmute(src:r)
15123}
15124
15125/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15126/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15127///
15128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15129#[inline]
15130#[target_feature(enable = "avx512f")]
15131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15132#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15133#[rustc_legacy_const_generics(3)]
15134pub unsafe fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(
15135 src: __m512,
15136 k: __mmask16,
15137 a: __m256i,
15138) -> __m512 {
15139 static_assert_sae!(SAE);
15140 let a: i16x16 = a.as_i16x16();
15141 let src: f32x16 = src.as_f32x16();
15142 let r: f32x16 = vcvtph2ps(a, src, mask:k, SAE);
15143 transmute(src:r)
15144}
15145
15146/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15147/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15148///
15149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15150#[inline]
15151#[target_feature(enable = "avx512f")]
15152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15153#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15154#[rustc_legacy_const_generics(2)]
15155pub unsafe fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15156 static_assert_sae!(SAE);
15157 let a: i16x16 = a.as_i16x16();
15158 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
15159 let r: f32x16 = vcvtph2ps(a, src:zero, mask:k, SAE);
15160 transmute(src:r)
15161}
15162
15163/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15164///
15165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15166#[inline]
15167#[target_feature(enable = "avx512f")]
15168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15169#[cfg_attr(test, assert_instr(vcvtph2ps))]
15170pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15171 transmute(src:vcvtph2ps(
15172 a:a.as_i16x16(),
15173 src:_mm512_setzero_ps().as_f32x16(),
15174 mask:0b11111111_11111111,
15175 _MM_FROUND_NO_EXC,
15176 ))
15177}
15178
15179/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15180///
15181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15182#[inline]
15183#[target_feature(enable = "avx512f")]
15184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15185#[cfg_attr(test, assert_instr(vcvtph2ps))]
15186pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15187 transmute(src:vcvtph2ps(
15188 a:a.as_i16x16(),
15189 src:src.as_f32x16(),
15190 mask:k,
15191 _MM_FROUND_NO_EXC,
15192 ))
15193}
15194
15195/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15196///
15197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15198#[inline]
15199#[target_feature(enable = "avx512f")]
15200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15201#[cfg_attr(test, assert_instr(vcvtph2ps))]
15202pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15203 transmute(src:vcvtph2ps(
15204 a:a.as_i16x16(),
15205 src:_mm512_setzero_ps().as_f32x16(),
15206 mask:k,
15207 _MM_FROUND_NO_EXC,
15208 ))
15209}
15210
15211/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15212///
15213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15214#[inline]
15215#[target_feature(enable = "avx512f,avx512vl")]
15216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15217#[cfg_attr(test, assert_instr(vcvtph2ps))]
15218pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15219 let convert: __m256 = _mm256_cvtph_ps(a);
15220 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15221}
15222
15223/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15224///
15225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15226#[inline]
15227#[target_feature(enable = "avx512f,avx512vl")]
15228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15229#[cfg_attr(test, assert_instr(vcvtph2ps))]
15230pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15231 let convert: __m256 = _mm256_cvtph_ps(a);
15232 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
15233 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:zero))
15234}
15235
15236/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15237///
15238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15239#[inline]
15240#[target_feature(enable = "avx512f,avx512vl")]
15241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15242#[cfg_attr(test, assert_instr(vcvtph2ps))]
15243pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15244 let convert: __m128 = _mm_cvtph_ps(a);
15245 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15246}
15247
15248/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15249///
15250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15251#[inline]
15252#[target_feature(enable = "avx512f,avx512vl")]
15253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15254#[cfg_attr(test, assert_instr(vcvtph2ps))]
15255pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15256 let convert: __m128 = _mm_cvtph_ps(a);
15257 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
15258 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:zero))
15259}
15260
15261/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15262/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15263///
15264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15265#[inline]
15266#[target_feature(enable = "avx512f")]
15267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15268#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15269#[rustc_legacy_const_generics(1)]
15270pub unsafe fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15271 static_assert_sae!(SAE);
15272 let a: f32x16 = a.as_f32x16();
15273 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
15274 let r: i32x16 = vcvttps2dq(a, src:zero, mask:0b11111111_11111111, SAE);
15275 transmute(src:r)
15276}
15277
15278/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15279/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15280///
15281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15282#[inline]
15283#[target_feature(enable = "avx512f")]
15284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15285#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15286#[rustc_legacy_const_generics(3)]
15287pub unsafe fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15288 src: __m512i,
15289 k: __mmask16,
15290 a: __m512,
15291) -> __m512i {
15292 static_assert_sae!(SAE);
15293 let a: f32x16 = a.as_f32x16();
15294 let src: i32x16 = src.as_i32x16();
15295 let r: i32x16 = vcvttps2dq(a, src, mask:k, SAE);
15296 transmute(src:r)
15297}
15298
15299/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15300/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15301///
15302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15303#[inline]
15304#[target_feature(enable = "avx512f")]
15305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15306#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15307#[rustc_legacy_const_generics(2)]
15308pub unsafe fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15309 static_assert_sae!(SAE);
15310 let a: f32x16 = a.as_f32x16();
15311 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
15312 let r: i32x16 = vcvttps2dq(a, src:zero, mask:k, SAE);
15313 transmute(src:r)
15314}
15315
15316/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15317/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15318///
15319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15320#[inline]
15321#[target_feature(enable = "avx512f")]
15322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15323#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15324#[rustc_legacy_const_generics(1)]
15325pub unsafe fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15326 static_assert_sae!(SAE);
15327 let a: f32x16 = a.as_f32x16();
15328 let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
15329 let r: u32x16 = vcvttps2udq(a, src:zero, mask:0b11111111_11111111, SAE);
15330 transmute(src:r)
15331}
15332
15333/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15334/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15335///
15336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15337#[inline]
15338#[target_feature(enable = "avx512f")]
15339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15340#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15341#[rustc_legacy_const_generics(3)]
15342pub unsafe fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15343 src: __m512i,
15344 k: __mmask16,
15345 a: __m512,
15346) -> __m512i {
15347 static_assert_sae!(SAE);
15348 let a: f32x16 = a.as_f32x16();
15349 let src: u32x16 = src.as_u32x16();
15350 let r: u32x16 = vcvttps2udq(a, src, mask:k, SAE);
15351 transmute(src:r)
15352}
15353
15354/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15355/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356///
15357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15358#[inline]
15359#[target_feature(enable = "avx512f")]
15360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15362#[rustc_legacy_const_generics(2)]
15363pub unsafe fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15364 static_assert_sae!(SAE);
15365 let a: f32x16 = a.as_f32x16();
15366 let zero: u32x16 = _mm512_setzero_si512().as_u32x16();
15367 let r: u32x16 = vcvttps2udq(a, src:zero, mask:k, SAE);
15368 transmute(src:r)
15369}
15370
15371/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15372/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15373///
15374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15375#[inline]
15376#[target_feature(enable = "avx512f")]
15377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15378#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15379#[rustc_legacy_const_generics(1)]
15380pub unsafe fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15381 static_assert_sae!(SAE);
15382 let a: f64x8 = a.as_f64x8();
15383 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15384 let r: i32x8 = vcvttpd2dq(a, src:zero, mask:0b11111111, SAE);
15385 transmute(src:r)
15386}
15387
15388/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15389/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15390///
15391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15392#[inline]
15393#[target_feature(enable = "avx512f")]
15394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15395#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15396#[rustc_legacy_const_generics(3)]
15397pub unsafe fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15398 src: __m256i,
15399 k: __mmask8,
15400 a: __m512d,
15401) -> __m256i {
15402 static_assert_sae!(SAE);
15403 let a: f64x8 = a.as_f64x8();
15404 let src: i32x8 = src.as_i32x8();
15405 let r: i32x8 = vcvttpd2dq(a, src, mask:k, SAE);
15406 transmute(src:r)
15407}
15408
15409/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15410/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15411///
15412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15413#[inline]
15414#[target_feature(enable = "avx512f")]
15415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15416#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15417#[rustc_legacy_const_generics(2)]
15418pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15419 static_assert_sae!(SAE);
15420 let a: f64x8 = a.as_f64x8();
15421 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15422 let r: i32x8 = vcvttpd2dq(a, src:zero, mask:k, SAE);
15423 transmute(src:r)
15424}
15425
15426/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15427/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15428///
15429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15430#[inline]
15431#[target_feature(enable = "avx512f")]
15432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15434#[rustc_legacy_const_generics(1)]
15435pub unsafe fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15436 static_assert_sae!(SAE);
15437 let a: f64x8 = a.as_f64x8();
15438 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15439 let r: u32x8 = vcvttpd2udq(a, src:zero, mask:0b11111111, SAE);
15440 transmute(src:r)
15441}
15442
15443/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15444/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15445///
15446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15447#[inline]
15448#[target_feature(enable = "avx512f")]
15449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15450#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15451#[rustc_legacy_const_generics(3)]
15452pub unsafe fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15453 src: __m256i,
15454 k: __mmask8,
15455 a: __m512d,
15456) -> __m256i {
15457 static_assert_sae!(SAE);
15458 let a: f64x8 = a.as_f64x8();
15459 let src: i32x8 = src.as_i32x8();
15460 let r: u32x8 = vcvttpd2udq(a, src, mask:k, SAE);
15461 transmute(src:r)
15462}
15463
15464/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15465///
15466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15467#[inline]
15468#[target_feature(enable = "avx512f")]
15469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15470#[cfg_attr(test, assert_instr(vcvttps2dq))]
15471pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15472 transmute(src:vcvttps2dq(
15473 a:a.as_f32x16(),
15474 src:_mm512_setzero_si512().as_i32x16(),
15475 mask:0b11111111_11111111,
15476 _MM_FROUND_CUR_DIRECTION,
15477 ))
15478}
15479
15480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15481///
15482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15483#[inline]
15484#[target_feature(enable = "avx512f")]
15485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15486#[cfg_attr(test, assert_instr(vcvttps2dq))]
15487pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15488 transmute(src:vcvttps2dq(
15489 a:a.as_f32x16(),
15490 src:src.as_i32x16(),
15491 mask:k,
15492 _MM_FROUND_CUR_DIRECTION,
15493 ))
15494}
15495
15496/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15497///
15498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15499#[inline]
15500#[target_feature(enable = "avx512f")]
15501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15502#[cfg_attr(test, assert_instr(vcvttps2dq))]
15503pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15504 transmute(src:vcvttps2dq(
15505 a:a.as_f32x16(),
15506 src:_mm512_setzero_si512().as_i32x16(),
15507 mask:k,
15508 _MM_FROUND_CUR_DIRECTION,
15509 ))
15510}
15511
15512/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15513///
15514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15515#[inline]
15516#[target_feature(enable = "avx512f,avx512vl")]
15517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15518#[cfg_attr(test, assert_instr(vcvttps2dq))]
15519pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15520 transmute(src:vcvttps2dq256(a:a.as_f32x8(), src:src.as_i32x8(), mask:k))
15521}
15522
15523/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15524///
15525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15526#[inline]
15527#[target_feature(enable = "avx512f,avx512vl")]
15528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15529#[cfg_attr(test, assert_instr(vcvttps2dq))]
15530pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15531 transmute(src:vcvttps2dq256(
15532 a:a.as_f32x8(),
15533 src:_mm256_setzero_si256().as_i32x8(),
15534 mask:k,
15535 ))
15536}
15537
15538/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15539///
15540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15541#[inline]
15542#[target_feature(enable = "avx512f,avx512vl")]
15543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15544#[cfg_attr(test, assert_instr(vcvttps2dq))]
15545pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15546 transmute(src:vcvttps2dq128(a:a.as_f32x4(), src:src.as_i32x4(), mask:k))
15547}
15548
15549/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15550///
15551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15552#[inline]
15553#[target_feature(enable = "avx512f,avx512vl")]
15554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15555#[cfg_attr(test, assert_instr(vcvttps2dq))]
15556pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15557 transmute(src:vcvttps2dq128(
15558 a:a.as_f32x4(),
15559 src:_mm_setzero_si128().as_i32x4(),
15560 mask:k,
15561 ))
15562}
15563
15564/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15565///
15566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15567#[inline]
15568#[target_feature(enable = "avx512f")]
15569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15570#[cfg_attr(test, assert_instr(vcvttps2udq))]
15571pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15572 transmute(src:vcvttps2udq(
15573 a:a.as_f32x16(),
15574 src:_mm512_setzero_si512().as_u32x16(),
15575 mask:0b11111111_11111111,
15576 _MM_FROUND_CUR_DIRECTION,
15577 ))
15578}
15579
15580/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15581///
15582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15583#[inline]
15584#[target_feature(enable = "avx512f")]
15585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15586#[cfg_attr(test, assert_instr(vcvttps2udq))]
15587pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15588 transmute(src:vcvttps2udq(
15589 a:a.as_f32x16(),
15590 src:src.as_u32x16(),
15591 mask:k,
15592 _MM_FROUND_CUR_DIRECTION,
15593 ))
15594}
15595
15596/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15597///
15598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15599#[inline]
15600#[target_feature(enable = "avx512f")]
15601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15602#[cfg_attr(test, assert_instr(vcvttps2udq))]
15603pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15604 transmute(src:vcvttps2udq(
15605 a:a.as_f32x16(),
15606 src:_mm512_setzero_si512().as_u32x16(),
15607 mask:k,
15608 _MM_FROUND_CUR_DIRECTION,
15609 ))
15610}
15611
15612/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15613///
15614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15615#[inline]
15616#[target_feature(enable = "avx512f,avx512vl")]
15617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15618#[cfg_attr(test, assert_instr(vcvttps2udq))]
15619pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15620 transmute(src:vcvttps2udq256(
15621 a:a.as_f32x8(),
15622 src:_mm256_setzero_si256().as_u32x8(),
15623 mask:0b11111111,
15624 ))
15625}
15626
15627/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15628///
15629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15630#[inline]
15631#[target_feature(enable = "avx512f,avx512vl")]
15632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15633#[cfg_attr(test, assert_instr(vcvttps2udq))]
15634pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15635 transmute(src:vcvttps2udq256(a:a.as_f32x8(), src:src.as_u32x8(), mask:k))
15636}
15637
15638/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15639///
15640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15641#[inline]
15642#[target_feature(enable = "avx512f,avx512vl")]
15643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15644#[cfg_attr(test, assert_instr(vcvttps2udq))]
15645pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15646 transmute(src:vcvttps2udq256(
15647 a:a.as_f32x8(),
15648 src:_mm256_setzero_si256().as_u32x8(),
15649 mask:k,
15650 ))
15651}
15652
15653/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15654///
15655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15656#[inline]
15657#[target_feature(enable = "avx512f,avx512vl")]
15658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15659#[cfg_attr(test, assert_instr(vcvttps2udq))]
15660pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15661 transmute(src:vcvttps2udq128(
15662 a:a.as_f32x4(),
15663 src:_mm_setzero_si128().as_u32x4(),
15664 mask:0b11111111,
15665 ))
15666}
15667
15668/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15669///
15670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15671#[inline]
15672#[target_feature(enable = "avx512f,avx512vl")]
15673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15674#[cfg_attr(test, assert_instr(vcvttps2udq))]
15675pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15676 transmute(src:vcvttps2udq128(a:a.as_f32x4(), src:src.as_u32x4(), mask:k))
15677}
15678
15679/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15680///
15681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15682#[inline]
15683#[target_feature(enable = "avx512f,avx512vl")]
15684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15685#[cfg_attr(test, assert_instr(vcvttps2udq))]
15686pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15687 transmute(src:vcvttps2udq128(
15688 a:a.as_f32x4(),
15689 src:_mm_setzero_si128().as_u32x4(),
15690 mask:k,
15691 ))
15692}
15693
15694/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15695/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15696///
15697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15698#[inline]
15699#[target_feature(enable = "avx512f")]
15700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15701#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15702#[rustc_legacy_const_generics(2)]
15703pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15704 static_assert_sae!(SAE);
15705 let a: f64x8 = a.as_f64x8();
15706 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
15707 let r: u32x8 = vcvttpd2udq(a, src:zero, mask:k, SAE);
15708 transmute(src:r)
15709}
15710
15711/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15712///
15713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15714#[inline]
15715#[target_feature(enable = "avx512f")]
15716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15717#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15718pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15719 transmute(src:vcvttpd2dq(
15720 a:a.as_f64x8(),
15721 src:_mm256_setzero_si256().as_i32x8(),
15722 mask:0b11111111,
15723 _MM_FROUND_CUR_DIRECTION,
15724 ))
15725}
15726
15727/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15728///
15729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15730#[inline]
15731#[target_feature(enable = "avx512f")]
15732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15733#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15734pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15735 transmute(src:vcvttpd2dq(
15736 a:a.as_f64x8(),
15737 src:src.as_i32x8(),
15738 mask:k,
15739 _MM_FROUND_CUR_DIRECTION,
15740 ))
15741}
15742
15743/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15744///
15745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
15746#[inline]
15747#[target_feature(enable = "avx512f")]
15748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15749#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15750pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
15751 transmute(src:vcvttpd2dq(
15752 a:a.as_f64x8(),
15753 src:_mm256_setzero_si256().as_i32x8(),
15754 mask:k,
15755 _MM_FROUND_CUR_DIRECTION,
15756 ))
15757}
15758
15759/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15760///
15761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
15762#[inline]
15763#[target_feature(enable = "avx512f,avx512vl")]
15764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15765#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15766pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
15767 transmute(src:vcvttpd2dq256(a:a.as_f64x4(), src:src.as_i32x4(), mask:k))
15768}
15769
15770/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15771///
15772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
15773#[inline]
15774#[target_feature(enable = "avx512f,avx512vl")]
15775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15776#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15777pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
15778 transmute(src:vcvttpd2dq256(
15779 a:a.as_f64x4(),
15780 src:_mm_setzero_si128().as_i32x4(),
15781 mask:k,
15782 ))
15783}
15784
15785/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15786///
15787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
15788#[inline]
15789#[target_feature(enable = "avx512f,avx512vl")]
15790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15791#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15792pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
15793 transmute(src:vcvttpd2dq128(a:a.as_f64x2(), src:src.as_i32x4(), mask:k))
15794}
15795
15796/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15797///
15798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
15799#[inline]
15800#[target_feature(enable = "avx512f,avx512vl")]
15801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15802#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15803pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
15804 transmute(src:vcvttpd2dq128(
15805 a:a.as_f64x2(),
15806 src:_mm_setzero_si128().as_i32x4(),
15807 mask:k,
15808 ))
15809}
15810
15811/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15812///
15813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
15814#[inline]
15815#[target_feature(enable = "avx512f")]
15816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15817#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15818pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
15819 transmute(src:vcvttpd2udq(
15820 a:a.as_f64x8(),
15821 src:_mm256_setzero_si256().as_i32x8(),
15822 mask:0b11111111,
15823 _MM_FROUND_CUR_DIRECTION,
15824 ))
15825}
15826
15827/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15828///
15829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
15830#[inline]
15831#[target_feature(enable = "avx512f")]
15832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15833#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15834pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15835 transmute(src:vcvttpd2udq(
15836 a:a.as_f64x8(),
15837 src:src.as_i32x8(),
15838 mask:k,
15839 _MM_FROUND_CUR_DIRECTION,
15840 ))
15841}
15842
15843/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15844///
15845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
15846#[inline]
15847#[target_feature(enable = "avx512f")]
15848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15849#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15850pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
15851 transmute(src:vcvttpd2udq(
15852 a:a.as_f64x8(),
15853 src:_mm256_setzero_si256().as_i32x8(),
15854 mask:k,
15855 _MM_FROUND_CUR_DIRECTION,
15856 ))
15857}
15858
15859/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15860///
15861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
15862#[inline]
15863#[target_feature(enable = "avx512f,avx512vl")]
15864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15865#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15866pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
15867 transmute(src:vcvttpd2udq256(
15868 a:a.as_f64x4(),
15869 src:_mm_setzero_si128().as_i32x4(),
15870 mask:0b11111111,
15871 ))
15872}
15873
15874/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15875///
15876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
15877#[inline]
15878#[target_feature(enable = "avx512f,avx512vl")]
15879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15880#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15881pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
15882 transmute(src:vcvttpd2udq256(a:a.as_f64x4(), src:src.as_i32x4(), mask:k))
15883}
15884
15885/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15886///
15887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
15888#[inline]
15889#[target_feature(enable = "avx512f,avx512vl")]
15890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15891#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15892pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
15893 transmute(src:vcvttpd2udq256(
15894 a:a.as_f64x4(),
15895 src:_mm_setzero_si128().as_i32x4(),
15896 mask:k,
15897 ))
15898}
15899
15900/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15901///
15902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
15903#[inline]
15904#[target_feature(enable = "avx512f,avx512vl")]
15905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15906#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15907pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
15908 transmute(src:vcvttpd2udq128(
15909 a:a.as_f64x2(),
15910 src:_mm_setzero_si128().as_i32x4(),
15911 mask:0b11111111,
15912 ))
15913}
15914
15915/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15916///
15917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
15918#[inline]
15919#[target_feature(enable = "avx512f,avx512vl")]
15920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15922pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
15923 transmute(src:vcvttpd2udq128(a:a.as_f64x2(), src:src.as_i32x4(), mask:k))
15924}
15925
15926/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15927///
15928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
15929#[inline]
15930#[target_feature(enable = "avx512f,avx512vl")]
15931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15933pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
15934 transmute(src:vcvttpd2udq128(
15935 a:a.as_f64x2(),
15936 src:_mm_setzero_si128().as_i32x4(),
15937 mask:k,
15938 ))
15939}
15940
15941/// Returns vector of type `__m512d` with all elements set to zero.
15942///
15943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
15944#[inline]
15945#[target_feature(enable = "avx512f")]
15946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15947#[cfg_attr(test, assert_instr(vxorps))]
15948pub unsafe fn _mm512_setzero_pd() -> __m512d {
15949 // All-0 is a properly initialized __m512d
15950 mem::zeroed()
15951}
15952
15953/// Returns vector of type `__m512` with all elements set to zero.
15954///
15955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
15956#[inline]
15957#[target_feature(enable = "avx512f")]
15958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15959#[cfg_attr(test, assert_instr(vxorps))]
15960pub unsafe fn _mm512_setzero_ps() -> __m512 {
15961 // All-0 is a properly initialized __m512
15962 mem::zeroed()
15963}
15964
15965/// Return vector of type `__m512` with all elements set to zero.
15966///
15967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
15968#[inline]
15969#[target_feature(enable = "avx512f")]
15970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15971#[cfg_attr(test, assert_instr(vxorps))]
15972pub unsafe fn _mm512_setzero() -> __m512 {
15973 // All-0 is a properly initialized __m512
15974 mem::zeroed()
15975}
15976
15977/// Returns vector of type `__m512i` with all elements set to zero.
15978///
15979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
15980#[inline]
15981#[target_feature(enable = "avx512f")]
15982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15983#[cfg_attr(test, assert_instr(vxorps))]
15984pub unsafe fn _mm512_setzero_si512() -> __m512i {
15985 // All-0 is a properly initialized __m512i
15986 mem::zeroed()
15987}
15988
15989/// Return vector of type `__m512i` with all elements set to zero.
15990///
15991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
15992#[inline]
15993#[target_feature(enable = "avx512f")]
15994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15995#[cfg_attr(test, assert_instr(vxorps))]
15996pub unsafe fn _mm512_setzero_epi32() -> __m512i {
15997 // All-0 is a properly initialized __m512i
15998 mem::zeroed()
15999}
16000
16001/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16002/// order.
16003///
16004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16005#[inline]
16006#[target_feature(enable = "avx512f")]
16007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008pub unsafe fn _mm512_setr_epi32(
16009 e15: i32,
16010 e14: i32,
16011 e13: i32,
16012 e12: i32,
16013 e11: i32,
16014 e10: i32,
16015 e9: i32,
16016 e8: i32,
16017 e7: i32,
16018 e6: i32,
16019 e5: i32,
16020 e4: i32,
16021 e3: i32,
16022 e2: i32,
16023 e1: i32,
16024 e0: i32,
16025) -> __m512i {
16026 let r: i32x16 = i32x16::new(
16027 x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16028 );
16029 transmute(src:r)
16030}
16031
16032/// Set packed 8-bit integers in dst with the supplied values.
16033///
16034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16035#[inline]
16036#[target_feature(enable = "avx512f")]
16037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16038pub unsafe fn _mm512_set_epi8(
16039 e63: i8,
16040 e62: i8,
16041 e61: i8,
16042 e60: i8,
16043 e59: i8,
16044 e58: i8,
16045 e57: i8,
16046 e56: i8,
16047 e55: i8,
16048 e54: i8,
16049 e53: i8,
16050 e52: i8,
16051 e51: i8,
16052 e50: i8,
16053 e49: i8,
16054 e48: i8,
16055 e47: i8,
16056 e46: i8,
16057 e45: i8,
16058 e44: i8,
16059 e43: i8,
16060 e42: i8,
16061 e41: i8,
16062 e40: i8,
16063 e39: i8,
16064 e38: i8,
16065 e37: i8,
16066 e36: i8,
16067 e35: i8,
16068 e34: i8,
16069 e33: i8,
16070 e32: i8,
16071 e31: i8,
16072 e30: i8,
16073 e29: i8,
16074 e28: i8,
16075 e27: i8,
16076 e26: i8,
16077 e25: i8,
16078 e24: i8,
16079 e23: i8,
16080 e22: i8,
16081 e21: i8,
16082 e20: i8,
16083 e19: i8,
16084 e18: i8,
16085 e17: i8,
16086 e16: i8,
16087 e15: i8,
16088 e14: i8,
16089 e13: i8,
16090 e12: i8,
16091 e11: i8,
16092 e10: i8,
16093 e9: i8,
16094 e8: i8,
16095 e7: i8,
16096 e6: i8,
16097 e5: i8,
16098 e4: i8,
16099 e3: i8,
16100 e2: i8,
16101 e1: i8,
16102 e0: i8,
16103) -> __m512i {
16104 let r: i8x64 = i8x64::new(
16105 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18, x19:e19,
16106 x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35, x36:e36, x37:e37,
16107 x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52, x53:e53, x54:e54, x55:e55,
16108 x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16109 );
16110 transmute(src:r)
16111}
16112
16113/// Set packed 16-bit integers in dst with the supplied values.
16114///
16115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16116#[inline]
16117#[target_feature(enable = "avx512f")]
16118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16119pub unsafe fn _mm512_set_epi16(
16120 e31: i16,
16121 e30: i16,
16122 e29: i16,
16123 e28: i16,
16124 e27: i16,
16125 e26: i16,
16126 e25: i16,
16127 e24: i16,
16128 e23: i16,
16129 e22: i16,
16130 e21: i16,
16131 e20: i16,
16132 e19: i16,
16133 e18: i16,
16134 e17: i16,
16135 e16: i16,
16136 e15: i16,
16137 e14: i16,
16138 e13: i16,
16139 e12: i16,
16140 e11: i16,
16141 e10: i16,
16142 e9: i16,
16143 e8: i16,
16144 e7: i16,
16145 e6: i16,
16146 e5: i16,
16147 e4: i16,
16148 e3: i16,
16149 e2: i16,
16150 e1: i16,
16151 e0: i16,
16152) -> __m512i {
16153 let r: i16x32 = i16x32::new(
16154 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18, x19:e19,
16155 x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16156 );
16157 transmute(src:r)
16158}
16159
16160/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16161///
16162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16163#[inline]
16164#[target_feature(enable = "avx512f")]
16165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16166pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16167 _mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16168}
16169
16170/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16171///
16172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16173#[inline]
16174#[target_feature(enable = "avx512f")]
16175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16176pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16177 _mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16178}
16179
16180/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16181///
16182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16183#[inline]
16184#[target_feature(enable = "avx512f")]
16185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16186pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16187 _mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16188}
16189
16190/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16191///
16192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16193#[inline]
16194#[target_feature(enable = "avx512f")]
16195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16196pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16197 _mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16198}
16199
16200/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16201///
16202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16203#[inline]
16204#[target_feature(enable = "avx512f")]
16205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16206pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16207 _mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16208}
16209
16210/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16211///
16212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16213#[inline]
16214#[target_feature(enable = "avx512f")]
16215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16216pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16217 _mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16218}
16219
16220/// Set packed 64-bit integers in dst with the supplied values.
16221///
16222/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16223#[inline]
16224#[target_feature(enable = "avx512f")]
16225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226pub unsafe fn _mm512_set_epi64(
16227 e0: i64,
16228 e1: i64,
16229 e2: i64,
16230 e3: i64,
16231 e4: i64,
16232 e5: i64,
16233 e6: i64,
16234 e7: i64,
16235) -> __m512i {
16236 _mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16237}
16238
16239/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16240///
16241/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16242#[inline]
16243#[target_feature(enable = "avx512f")]
16244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16245pub unsafe fn _mm512_setr_epi64(
16246 e0: i64,
16247 e1: i64,
16248 e2: i64,
16249 e3: i64,
16250 e4: i64,
16251 e5: i64,
16252 e6: i64,
16253 e7: i64,
16254) -> __m512i {
16255 let r: i64x8 = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16256 transmute(src:r)
16257}
16258
16259/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16260///
16261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16262#[inline]
16263#[target_feature(enable = "avx512f")]
16264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16265#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16266#[rustc_legacy_const_generics(2)]
16267pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
16268 static_assert_imm8_scale!(SCALE);
16269 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
16270 let neg_one: i8 = -1;
16271 let slice: *const i8 = slice as *const i8;
16272 let offsets: i32x8 = offsets.as_i32x8();
16273 let r: f64x8 = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16274 transmute(src:r)
16275}
16276
16277/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16278///
16279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16280#[inline]
16281#[target_feature(enable = "avx512f")]
16282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16284#[rustc_legacy_const_generics(4)]
16285pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16286 src: __m512d,
16287 mask: __mmask8,
16288 offsets: __m256i,
16289 slice: *const u8,
16290) -> __m512d {
16291 static_assert_imm8_scale!(SCALE);
16292 let src: f64x8 = src.as_f64x8();
16293 let slice: *const i8 = slice as *const i8;
16294 let offsets: i32x8 = offsets.as_i32x8();
16295 let r: f64x8 = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16296 transmute(src:r)
16297}
16298
16299/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16300///
16301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16302#[inline]
16303#[target_feature(enable = "avx512f")]
16304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16305#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16306#[rustc_legacy_const_generics(2)]
16307pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
16308 static_assert_imm8_scale!(SCALE);
16309 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
16310 let neg_one: i8 = -1;
16311 let slice: *const i8 = slice as *const i8;
16312 let offsets: i64x8 = offsets.as_i64x8();
16313 let r: f64x8 = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16314 transmute(src:r)
16315}
16316
16317/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16318///
16319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16320#[inline]
16321#[target_feature(enable = "avx512f")]
16322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16323#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16324#[rustc_legacy_const_generics(4)]
16325pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16326 src: __m512d,
16327 mask: __mmask8,
16328 offsets: __m512i,
16329 slice: *const u8,
16330) -> __m512d {
16331 static_assert_imm8_scale!(SCALE);
16332 let src: f64x8 = src.as_f64x8();
16333 let slice: *const i8 = slice as *const i8;
16334 let offsets: i64x8 = offsets.as_i64x8();
16335 let r: f64x8 = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16336 transmute(src:r)
16337}
16338
16339/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16340///
16341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16342#[inline]
16343#[target_feature(enable = "avx512f")]
16344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16345#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16346#[rustc_legacy_const_generics(2)]
16347pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
16348 static_assert_imm8_scale!(SCALE);
16349 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
16350 let neg_one: i8 = -1;
16351 let slice: *const i8 = slice as *const i8;
16352 let offsets: i64x8 = offsets.as_i64x8();
16353 let r: f32x8 = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
16354 transmute(src:r)
16355}
16356
16357/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16358///
16359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16360#[inline]
16361#[target_feature(enable = "avx512f")]
16362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16363#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16364#[rustc_legacy_const_generics(4)]
16365pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16366 src: __m256,
16367 mask: __mmask8,
16368 offsets: __m512i,
16369 slice: *const u8,
16370) -> __m256 {
16371 static_assert_imm8_scale!(SCALE);
16372 let src: f32x8 = src.as_f32x8();
16373 let slice: *const i8 = slice as *const i8;
16374 let offsets: i64x8 = offsets.as_i64x8();
16375 let r: f32x8 = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16376 transmute(src:r)
16377}
16378
16379/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16380///
16381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16382#[inline]
16383#[target_feature(enable = "avx512f")]
16384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16385#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16386#[rustc_legacy_const_generics(2)]
16387pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
16388 static_assert_imm8_scale!(SCALE);
16389 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
16390 let neg_one: i16 = -1;
16391 let slice: *const i8 = slice as *const i8;
16392 let offsets: i32x16 = offsets.as_i32x16();
16393 let r: f32x16 = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
16394 transmute(src:r)
16395}
16396
16397/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16398///
16399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16400#[inline]
16401#[target_feature(enable = "avx512f")]
16402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16403#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16404#[rustc_legacy_const_generics(4)]
16405pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16406 src: __m512,
16407 mask: __mmask16,
16408 offsets: __m512i,
16409 slice: *const u8,
16410) -> __m512 {
16411 static_assert_imm8_scale!(SCALE);
16412 let src: f32x16 = src.as_f32x16();
16413 let slice: *const i8 = slice as *const i8;
16414 let offsets: i32x16 = offsets.as_i32x16();
16415 let r: f32x16 = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16416 transmute(src:r)
16417}
16418
16419/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16426#[rustc_legacy_const_generics(2)]
16427pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16428 offsets: __m512i,
16429 slice: *const u8,
16430) -> __m512i {
16431 static_assert_imm8_scale!(SCALE);
16432 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
16433 let neg_one: i16 = -1;
16434 let slice: *const i8 = slice as *const i8;
16435 let offsets: i32x16 = offsets.as_i32x16();
16436 let r: i32x16 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
16437 transmute(src:r)
16438}
16439
16440/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16441///
16442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16443#[inline]
16444#[target_feature(enable = "avx512f")]
16445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16446#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16447#[rustc_legacy_const_generics(4)]
16448pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16449 src: __m512i,
16450 mask: __mmask16,
16451 offsets: __m512i,
16452 slice: *const u8,
16453) -> __m512i {
16454 static_assert_imm8_scale!(SCALE);
16455 let src: i32x16 = src.as_i32x16();
16456 let mask: i16 = mask as i16;
16457 let slice: *const i8 = slice as *const i8;
16458 let offsets: i32x16 = offsets.as_i32x16();
16459 let r: i32x16 = vpgatherdd(src, slice, offsets, mask, SCALE);
16460 transmute(src:r)
16461}
16462
16463/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16464///
16465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16466#[inline]
16467#[target_feature(enable = "avx512f")]
16468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16469#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16470#[rustc_legacy_const_generics(2)]
16471pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16472 offsets: __m256i,
16473 slice: *const u8,
16474) -> __m512i {
16475 static_assert_imm8_scale!(SCALE);
16476 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
16477 let neg_one: i8 = -1;
16478 let slice: *const i8 = slice as *const i8;
16479 let offsets: i32x8 = offsets.as_i32x8();
16480 let r: i64x8 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
16481 transmute(src:r)
16482}
16483
16484/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16485///
16486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16487#[inline]
16488#[target_feature(enable = "avx512f")]
16489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16490#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16491#[rustc_legacy_const_generics(4)]
16492pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16493 src: __m512i,
16494 mask: __mmask8,
16495 offsets: __m256i,
16496 slice: *const u8,
16497) -> __m512i {
16498 static_assert_imm8_scale!(SCALE);
16499 let src: i64x8 = src.as_i64x8();
16500 let mask: i8 = mask as i8;
16501 let slice: *const i8 = slice as *const i8;
16502 let offsets: i32x8 = offsets.as_i32x8();
16503 let r: i64x8 = vpgatherdq(src, slice, offsets, mask, SCALE);
16504 transmute(src:r)
16505}
16506
16507/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16508///
16509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16510#[inline]
16511#[target_feature(enable = "avx512f")]
16512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16513#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16514#[rustc_legacy_const_generics(2)]
16515pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16516 offsets: __m512i,
16517 slice: *const u8,
16518) -> __m512i {
16519 static_assert_imm8_scale!(SCALE);
16520 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
16521 let neg_one: i8 = -1;
16522 let slice: *const i8 = slice as *const i8;
16523 let offsets: i64x8 = offsets.as_i64x8();
16524 let r: i64x8 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
16525 transmute(src:r)
16526}
16527
16528/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16529///
16530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16531#[inline]
16532#[target_feature(enable = "avx512f")]
16533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16534#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16535#[rustc_legacy_const_generics(4)]
16536pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16537 src: __m512i,
16538 mask: __mmask8,
16539 offsets: __m512i,
16540 slice: *const u8,
16541) -> __m512i {
16542 static_assert_imm8_scale!(SCALE);
16543 let src: i64x8 = src.as_i64x8();
16544 let mask: i8 = mask as i8;
16545 let slice: *const i8 = slice as *const i8;
16546 let offsets: i64x8 = offsets.as_i64x8();
16547 let r: i64x8 = vpgatherqq(src, slice, offsets, mask, SCALE);
16548 transmute(src:r)
16549}
16550
16551/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16552///
16553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16554#[inline]
16555#[target_feature(enable = "avx512f")]
16556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16557#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16558#[rustc_legacy_const_generics(2)]
16559pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16560 offsets: __m512i,
16561 slice: *const u8,
16562) -> __m256i {
16563 static_assert_imm8_scale!(SCALE);
16564 let zeros: i32x8 = _mm256_setzero_si256().as_i32x8();
16565 let neg_one: i8 = -1;
16566 let slice: *const i8 = slice as *const i8;
16567 let offsets: i64x8 = offsets.as_i64x8();
16568 let r: i32x8 = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
16569 transmute(src:r)
16570}
16571
16572/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16573///
16574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16575#[inline]
16576#[target_feature(enable = "avx512f")]
16577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16578#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16579#[rustc_legacy_const_generics(4)]
16580pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16581 src: __m256i,
16582 mask: __mmask8,
16583 offsets: __m512i,
16584 slice: *const u8,
16585) -> __m256i {
16586 static_assert_imm8_scale!(SCALE);
16587 let src: i32x8 = src.as_i32x8();
16588 let mask: i8 = mask as i8;
16589 let slice: *const i8 = slice as *const i8;
16590 let offsets: i64x8 = offsets.as_i64x8();
16591 let r: i32x8 = vpgatherqd(src, slice, offsets, mask, SCALE);
16592 transmute(src:r)
16593}
16594
16595/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16596///
16597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16598#[inline]
16599#[target_feature(enable = "avx512f")]
16600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16601#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16602#[rustc_legacy_const_generics(3)]
16603pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16604 slice: *mut u8,
16605 offsets: __m256i,
16606 src: __m512d,
16607) {
16608 static_assert_imm8_scale!(SCALE);
16609 let src: f64x8 = src.as_f64x8();
16610 let neg_one: i8 = -1;
16611 let slice: *mut i8 = slice as *mut i8;
16612 let offsets: i32x8 = offsets.as_i32x8();
16613 vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
16614}
16615
16616/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16617///
16618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16619#[inline]
16620#[target_feature(enable = "avx512f")]
16621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16622#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16623#[rustc_legacy_const_generics(4)]
16624pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16625 slice: *mut u8,
16626 mask: __mmask8,
16627 offsets: __m256i,
16628 src: __m512d,
16629) {
16630 static_assert_imm8_scale!(SCALE);
16631 let src: f64x8 = src.as_f64x8();
16632 let slice: *mut i8 = slice as *mut i8;
16633 let offsets: i32x8 = offsets.as_i32x8();
16634 vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16635}
16636
16637/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16638///
16639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16640#[inline]
16641#[target_feature(enable = "avx512f")]
16642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16643#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16644#[rustc_legacy_const_generics(3)]
16645pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16646 slice: *mut u8,
16647 offsets: __m512i,
16648 src: __m512d,
16649) {
16650 static_assert_imm8_scale!(SCALE);
16651 let src: f64x8 = src.as_f64x8();
16652 let neg_one: i8 = -1;
16653 let slice: *mut i8 = slice as *mut i8;
16654 let offsets: i64x8 = offsets.as_i64x8();
16655 vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
16656}
16657
16658/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16659///
16660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16661#[inline]
16662#[target_feature(enable = "avx512f")]
16663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16664#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16665#[rustc_legacy_const_generics(4)]
16666pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16667 slice: *mut u8,
16668 mask: __mmask8,
16669 offsets: __m512i,
16670 src: __m512d,
16671) {
16672 static_assert_imm8_scale!(SCALE);
16673 let src: f64x8 = src.as_f64x8();
16674 let slice: *mut i8 = slice as *mut i8;
16675 let offsets: i64x8 = offsets.as_i64x8();
16676 vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16677}
16678
16679/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16680///
16681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16682#[inline]
16683#[target_feature(enable = "avx512f")]
16684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16685#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16686#[rustc_legacy_const_generics(3)]
16687pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16688 slice: *mut u8,
16689 offsets: __m512i,
16690 src: __m512,
16691) {
16692 static_assert_imm8_scale!(SCALE);
16693 let src: f32x16 = src.as_f32x16();
16694 let neg_one: i16 = -1;
16695 let slice: *mut i8 = slice as *mut i8;
16696 let offsets: i32x16 = offsets.as_i32x16();
16697 vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
16698}
16699
16700/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16701///
16702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16703#[inline]
16704#[target_feature(enable = "avx512f")]
16705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16706#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16707#[rustc_legacy_const_generics(4)]
16708pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16709 slice: *mut u8,
16710 mask: __mmask16,
16711 offsets: __m512i,
16712 src: __m512,
16713) {
16714 static_assert_imm8_scale!(SCALE);
16715 let src: f32x16 = src.as_f32x16();
16716 let slice: *mut i8 = slice as *mut i8;
16717 let offsets: i32x16 = offsets.as_i32x16();
16718 vscatterdps(slice, mask as i16, offsets, src, SCALE);
16719}
16720
16721/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16722///
16723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16724#[inline]
16725#[target_feature(enable = "avx512f")]
16726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16727#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16728#[rustc_legacy_const_generics(3)]
16729pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16730 slice: *mut u8,
16731 offsets: __m512i,
16732 src: __m256,
16733) {
16734 static_assert_imm8_scale!(SCALE);
16735 let src: f32x8 = src.as_f32x8();
16736 let neg_one: i8 = -1;
16737 let slice: *mut i8 = slice as *mut i8;
16738 let offsets: i64x8 = offsets.as_i64x8();
16739 vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
16740}
16741
16742/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16743///
16744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
16745#[inline]
16746#[target_feature(enable = "avx512f")]
16747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16748#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16749#[rustc_legacy_const_generics(4)]
16750pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
16751 slice: *mut u8,
16752 mask: __mmask8,
16753 offsets: __m512i,
16754 src: __m256,
16755) {
16756 static_assert_imm8_scale!(SCALE);
16757 let src: f32x8 = src.as_f32x8();
16758 let slice: *mut i8 = slice as *mut i8;
16759 let offsets: i64x8 = offsets.as_i64x8();
16760 vscatterqps(slice, mask as i8, offsets, src, SCALE);
16761}
16762
16763/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16764///
16765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
16766#[inline]
16767#[target_feature(enable = "avx512f")]
16768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16769#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16770#[rustc_legacy_const_generics(3)]
16771pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
16772 slice: *mut u8,
16773 offsets: __m256i,
16774 src: __m512i,
16775) {
16776 static_assert_imm8_scale!(SCALE);
16777 let src: i64x8 = src.as_i64x8();
16778 let neg_one: i8 = -1;
16779 let slice: *mut i8 = slice as *mut i8;
16780 let offsets: i32x8 = offsets.as_i32x8();
16781 vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
16782}
16783
16784/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16785///
16786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
16787#[inline]
16788#[target_feature(enable = "avx512f")]
16789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16790#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16791#[rustc_legacy_const_generics(4)]
16792pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
16793 slice: *mut u8,
16794 mask: __mmask8,
16795 offsets: __m256i,
16796 src: __m512i,
16797) {
16798 static_assert_imm8_scale!(SCALE);
16799 let src: i64x8 = src.as_i64x8();
16800 let mask: i8 = mask as i8;
16801 let slice: *mut i8 = slice as *mut i8;
16802 let offsets: i32x8 = offsets.as_i32x8();
16803 vpscatterdq(slice, mask, offsets, src, SCALE);
16804}
16805
16806/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16807///
16808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
16809#[inline]
16810#[target_feature(enable = "avx512f,avx512vl")]
16811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16812#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16813#[rustc_legacy_const_generics(3)]
16814pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
16815 slice: *mut u8,
16816 offsets: __m128i,
16817 src: __m256i,
16818) {
16819 static_assert_imm8_scale!(SCALE);
16820 let src: i64x4 = src.as_i64x4();
16821 let neg_one: i8 = -1;
16822 let slice: *mut i8 = slice as *mut i8;
16823 let offsets: i32x4 = offsets.as_i32x4();
16824 vpscatterdq256(slice, mask:neg_one, offsets, src, SCALE);
16825}
16826
16827/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16828///
16829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
16830#[inline]
16831#[target_feature(enable = "avx512f")]
16832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16833#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
16834#[rustc_legacy_const_generics(3)]
16835pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
16836 slice: *mut u8,
16837 offsets: __m512i,
16838 src: __m512i,
16839) {
16840 static_assert_imm8_scale!(SCALE);
16841 let src: i64x8 = src.as_i64x8();
16842 let neg_one: i8 = -1;
16843 let slice: *mut i8 = slice as *mut i8;
16844 let offsets: i64x8 = offsets.as_i64x8();
16845 vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
16846}
16847
16848/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16849///
16850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
16851#[inline]
16852#[target_feature(enable = "avx512f")]
16853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16854#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
16855#[rustc_legacy_const_generics(4)]
16856pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
16857 slice: *mut u8,
16858 mask: __mmask8,
16859 offsets: __m512i,
16860 src: __m512i,
16861) {
16862 static_assert_imm8_scale!(SCALE);
16863 let src: i64x8 = src.as_i64x8();
16864 let mask: i8 = mask as i8;
16865 let slice: *mut i8 = slice as *mut i8;
16866 let offsets: i64x8 = offsets.as_i64x8();
16867 vpscatterqq(slice, mask, offsets, src, SCALE);
16868}
16869
16870/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16871///
16872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
16873#[inline]
16874#[target_feature(enable = "avx512f")]
16875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16876#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
16877#[rustc_legacy_const_generics(3)]
16878pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
16879 slice: *mut u8,
16880 offsets: __m512i,
16881 src: __m512i,
16882) {
16883 static_assert_imm8_scale!(SCALE);
16884 let src: i32x16 = src.as_i32x16();
16885 let neg_one: i16 = -1;
16886 let slice: *mut i8 = slice as *mut i8;
16887 let offsets: i32x16 = offsets.as_i32x16();
16888 vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
16889}
16890
16891/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16892///
16893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
16894#[inline]
16895#[target_feature(enable = "avx512f")]
16896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16897#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
16898#[rustc_legacy_const_generics(4)]
16899pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
16900 slice: *mut u8,
16901 mask: __mmask16,
16902 offsets: __m512i,
16903 src: __m512i,
16904) {
16905 static_assert_imm8_scale!(SCALE);
16906 let src: i32x16 = src.as_i32x16();
16907 let mask: i16 = mask as i16;
16908 let slice: *mut i8 = slice as *mut i8;
16909 let offsets: i32x16 = offsets.as_i32x16();
16910 vpscatterdd(slice, mask, offsets, src, SCALE);
16911}
16912
16913/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16914///
16915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
16916#[inline]
16917#[target_feature(enable = "avx512f")]
16918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16919#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16920#[rustc_legacy_const_generics(3)]
16921pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
16922 slice: *mut u8,
16923 offsets: __m512i,
16924 src: __m256i,
16925) {
16926 static_assert_imm8_scale!(SCALE);
16927 let src: i32x8 = src.as_i32x8();
16928 let neg_one: i8 = -1;
16929 let slice: *mut i8 = slice as *mut i8;
16930 let offsets: i64x8 = offsets.as_i64x8();
16931 vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
16932}
16933
16934/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16935///
16936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
16937#[inline]
16938#[target_feature(enable = "avx512f")]
16939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16940#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16941#[rustc_legacy_const_generics(4)]
16942pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
16943 slice: *mut u8,
16944 mask: __mmask8,
16945 offsets: __m512i,
16946 src: __m256i,
16947) {
16948 static_assert_imm8_scale!(SCALE);
16949 let src: i32x8 = src.as_i32x8();
16950 let mask: i8 = mask as i8;
16951 let slice: *mut i8 = slice as *mut i8;
16952 let offsets: i64x8 = offsets.as_i64x8();
16953 vpscatterqd(slice, mask, offsets, src, SCALE);
16954}
16955
16956/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
16957///
16958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
16959#[inline]
16960#[target_feature(enable = "avx512f")]
16961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16962#[cfg_attr(test, assert_instr(vpcompressd))]
16963pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
16964 transmute(src:vpcompressd(a:a.as_i32x16(), src:src.as_i32x16(), mask:k))
16965}
16966
16967/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
16968///
16969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
16970#[inline]
16971#[target_feature(enable = "avx512f")]
16972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16973#[cfg_attr(test, assert_instr(vpcompressd))]
16974pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
16975 transmute(src:vpcompressd(
16976 a:a.as_i32x16(),
16977 src:_mm512_setzero_si512().as_i32x16(),
16978 mask:k,
16979 ))
16980}
16981
16982/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
16983///
16984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
16985#[inline]
16986#[target_feature(enable = "avx512f,avx512vl")]
16987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16988#[cfg_attr(test, assert_instr(vpcompressd))]
16989pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
16990 transmute(src:vpcompressd256(a:a.as_i32x8(), src:src.as_i32x8(), mask:k))
16991}
16992
16993/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
16994///
16995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
16996#[inline]
16997#[target_feature(enable = "avx512f,avx512vl")]
16998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16999#[cfg_attr(test, assert_instr(vpcompressd))]
17000pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
17001 transmute(src:vpcompressd256(
17002 a:a.as_i32x8(),
17003 src:_mm256_setzero_si256().as_i32x8(),
17004 mask:k,
17005 ))
17006}
17007
17008/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17009///
17010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
17011#[inline]
17012#[target_feature(enable = "avx512f,avx512vl")]
17013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17014#[cfg_attr(test, assert_instr(vpcompressd))]
17015pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17016 transmute(src:vpcompressd128(a:a.as_i32x4(), src:src.as_i32x4(), mask:k))
17017}
17018
17019/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17020///
17021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
17022#[inline]
17023#[target_feature(enable = "avx512f,avx512vl")]
17024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17025#[cfg_attr(test, assert_instr(vpcompressd))]
17026pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
17027 transmute(src:vpcompressd128(
17028 a:a.as_i32x4(),
17029 src:_mm_setzero_si128().as_i32x4(),
17030 mask:k,
17031 ))
17032}
17033
17034/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17035///
17036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
17037#[inline]
17038#[target_feature(enable = "avx512f")]
17039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17040#[cfg_attr(test, assert_instr(vpcompressq))]
17041pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
17042 transmute(src:vpcompressq(a:a.as_i64x8(), src:src.as_i64x8(), mask:k))
17043}
17044
17045/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17046///
17047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
17048#[inline]
17049#[target_feature(enable = "avx512f")]
17050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17051#[cfg_attr(test, assert_instr(vpcompressq))]
17052pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
17053 transmute(src:vpcompressq(
17054 a:a.as_i64x8(),
17055 src:_mm512_setzero_si512().as_i64x8(),
17056 mask:k,
17057 ))
17058}
17059
17060/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17061///
17062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
17063#[inline]
17064#[target_feature(enable = "avx512f,avx512vl")]
17065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17066#[cfg_attr(test, assert_instr(vpcompressq))]
17067pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17068 transmute(src:vpcompressq256(a:a.as_i64x4(), src:src.as_i64x4(), mask:k))
17069}
17070
17071/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17072///
17073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
17074#[inline]
17075#[target_feature(enable = "avx512f,avx512vl")]
17076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17077#[cfg_attr(test, assert_instr(vpcompressq))]
17078pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
17079 transmute(src:vpcompressq256(
17080 a:a.as_i64x4(),
17081 src:_mm256_setzero_si256().as_i64x4(),
17082 mask:k,
17083 ))
17084}
17085
17086/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17087///
17088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
17089#[inline]
17090#[target_feature(enable = "avx512f,avx512vl")]
17091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17092#[cfg_attr(test, assert_instr(vpcompressq))]
17093pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17094 transmute(src:vpcompressq128(a:a.as_i64x2(), src:src.as_i64x2(), mask:k))
17095}
17096
17097/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17098///
17099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
17100#[inline]
17101#[target_feature(enable = "avx512f,avx512vl")]
17102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17103#[cfg_attr(test, assert_instr(vpcompressq))]
17104pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
17105 transmute(src:vpcompressq128(
17106 a:a.as_i64x2(),
17107 src:_mm_setzero_si128().as_i64x2(),
17108 mask:k,
17109 ))
17110}
17111
17112/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17113///
17114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
17115#[inline]
17116#[target_feature(enable = "avx512f")]
17117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17118#[cfg_attr(test, assert_instr(vcompressps))]
17119pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
17120 transmute(src:vcompressps(a:a.as_f32x16(), src:src.as_f32x16(), mask:k))
17121}
17122
17123/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17129#[cfg_attr(test, assert_instr(vcompressps))]
17130pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
17131 transmute(src:vcompressps(
17132 a:a.as_f32x16(),
17133 src:_mm512_setzero_ps().as_f32x16(),
17134 mask:k,
17135 ))
17136}
17137
17138/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17139///
17140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
17141#[inline]
17142#[target_feature(enable = "avx512f,avx512vl")]
17143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17144#[cfg_attr(test, assert_instr(vcompressps))]
17145pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
17146 transmute(src:vcompressps256(a:a.as_f32x8(), src:src.as_f32x8(), mask:k))
17147}
17148
17149/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17150///
17151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
17152#[inline]
17153#[target_feature(enable = "avx512f,avx512vl")]
17154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17155#[cfg_attr(test, assert_instr(vcompressps))]
17156pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
17157 transmute(src:vcompressps256(
17158 a:a.as_f32x8(),
17159 src:_mm256_setzero_ps().as_f32x8(),
17160 mask:k,
17161 ))
17162}
17163
17164/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17165///
17166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
17167#[inline]
17168#[target_feature(enable = "avx512f,avx512vl")]
17169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17170#[cfg_attr(test, assert_instr(vcompressps))]
17171pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
17172 transmute(src:vcompressps128(a:a.as_f32x4(), src:src.as_f32x4(), mask:k))
17173}
17174
17175/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17176///
17177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
17178#[inline]
17179#[target_feature(enable = "avx512f,avx512vl")]
17180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17181#[cfg_attr(test, assert_instr(vcompressps))]
17182pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
17183 transmute(src:vcompressps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), mask:k))
17184}
17185
17186/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17187///
17188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
17189#[inline]
17190#[target_feature(enable = "avx512f")]
17191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17192#[cfg_attr(test, assert_instr(vcompresspd))]
17193pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
17194 transmute(src:vcompresspd(a:a.as_f64x8(), src:src.as_f64x8(), mask:k))
17195}
17196
17197/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17198///
17199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
17200#[inline]
17201#[target_feature(enable = "avx512f")]
17202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17203#[cfg_attr(test, assert_instr(vcompresspd))]
17204pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
17205 transmute(src:vcompresspd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), mask:k))
17206}
17207
17208/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17209///
17210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
17211#[inline]
17212#[target_feature(enable = "avx512f,avx512vl")]
17213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17214#[cfg_attr(test, assert_instr(vcompresspd))]
17215pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
17216 transmute(src:vcompresspd256(a:a.as_f64x4(), src:src.as_f64x4(), mask:k))
17217}
17218
17219/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17220///
17221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
17222#[inline]
17223#[target_feature(enable = "avx512f,avx512vl")]
17224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17225#[cfg_attr(test, assert_instr(vcompresspd))]
17226pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
17227 transmute(src:vcompresspd256(
17228 a:a.as_f64x4(),
17229 src:_mm256_setzero_pd().as_f64x4(),
17230 mask:k,
17231 ))
17232}
17233
17234/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17235///
17236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
17237#[inline]
17238#[target_feature(enable = "avx512f,avx512vl")]
17239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17240#[cfg_attr(test, assert_instr(vcompresspd))]
17241pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
17242 transmute(src:vcompresspd128(a:a.as_f64x2(), src:src.as_f64x2(), mask:k))
17243}
17244
17245/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17246///
17247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
17248#[inline]
17249#[target_feature(enable = "avx512f,avx512vl")]
17250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17251#[cfg_attr(test, assert_instr(vcompresspd))]
17252pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
17253 transmute(src:vcompresspd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), mask:k))
17254}
17255
17256/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17257///
17258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
17259#[inline]
17260#[target_feature(enable = "avx512f")]
17261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17262#[cfg_attr(test, assert_instr(vpcompressd))]
17263pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
17264 vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
17265}
17266
17267/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17268///
17269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
17270#[inline]
17271#[target_feature(enable = "avx512f,avx512vl")]
17272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17273#[cfg_attr(test, assert_instr(vpcompressd))]
17274pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
17275 vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
17276}
17277
17278/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17279///
17280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
17281#[inline]
17282#[target_feature(enable = "avx512f,avx512vl")]
17283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17284#[cfg_attr(test, assert_instr(vpcompressd))]
17285pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
17286 vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
17287}
17288
17289/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17290///
17291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
17292#[inline]
17293#[target_feature(enable = "avx512f")]
17294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17295#[cfg_attr(test, assert_instr(vpcompressq))]
17296pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
17297 vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
17298}
17299
17300/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17301///
17302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
17303#[inline]
17304#[target_feature(enable = "avx512f,avx512vl")]
17305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17306#[cfg_attr(test, assert_instr(vpcompressq))]
17307pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
17308 vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
17309}
17310
17311/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17312///
17313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
17314#[inline]
17315#[target_feature(enable = "avx512f,avx512vl")]
17316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17317#[cfg_attr(test, assert_instr(vpcompressq))]
17318pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
17319 vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
17320}
17321
17322/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17323///
17324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
17325#[inline]
17326#[target_feature(enable = "avx512f")]
17327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17328#[cfg_attr(test, assert_instr(vcompressps))]
17329pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
17330 vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
17331}
17332
17333/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17334///
17335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
17336#[inline]
17337#[target_feature(enable = "avx512f,avx512vl")]
17338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17339#[cfg_attr(test, assert_instr(vcompressps))]
17340pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
17341 vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
17342}
17343
17344/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17345///
17346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
17347#[inline]
17348#[target_feature(enable = "avx512f,avx512vl")]
17349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17350#[cfg_attr(test, assert_instr(vcompressps))]
17351pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
17352 vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
17353}
17354
17355/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17356///
17357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
17358#[inline]
17359#[target_feature(enable = "avx512f")]
17360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17361#[cfg_attr(test, assert_instr(vcompresspd))]
17362pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
17363 vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
17364}
17365
17366/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17367///
17368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
17369#[inline]
17370#[target_feature(enable = "avx512f,avx512vl")]
17371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17372#[cfg_attr(test, assert_instr(vcompresspd))]
17373pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
17374 vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
17375}
17376
17377/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17378///
17379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
17380#[inline]
17381#[target_feature(enable = "avx512f,avx512vl")]
17382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17383#[cfg_attr(test, assert_instr(vcompresspd))]
17384pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
17385 vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
17386}
17387
17388/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17389///
17390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
17391#[inline]
17392#[target_feature(enable = "avx512f")]
17393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17394#[cfg_attr(test, assert_instr(vpexpandd))]
17395pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
17396 transmute(src:vpexpandd(a:a.as_i32x16(), src:src.as_i32x16(), mask:k))
17397}
17398
17399/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17400///
17401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
17402#[inline]
17403#[target_feature(enable = "avx512f")]
17404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17405#[cfg_attr(test, assert_instr(vpexpandd))]
17406pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
17407 transmute(src:vpexpandd(
17408 a:a.as_i32x16(),
17409 src:_mm512_setzero_si512().as_i32x16(),
17410 mask:k,
17411 ))
17412}
17413
17414/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17415///
17416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
17417#[inline]
17418#[target_feature(enable = "avx512f,avx512vl")]
17419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17420#[cfg_attr(test, assert_instr(vpexpandd))]
17421pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17422 transmute(src:vpexpandd256(a:a.as_i32x8(), src:src.as_i32x8(), mask:k))
17423}
17424
17425/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17426///
17427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
17428#[inline]
17429#[target_feature(enable = "avx512f,avx512vl")]
17430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17431#[cfg_attr(test, assert_instr(vpexpandd))]
17432pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
17433 transmute(src:vpexpandd256(
17434 a:a.as_i32x8(),
17435 src:_mm256_setzero_si256().as_i32x8(),
17436 mask:k,
17437 ))
17438}
17439
17440/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17441///
17442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
17443#[inline]
17444#[target_feature(enable = "avx512f,avx512vl")]
17445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17446#[cfg_attr(test, assert_instr(vpexpandd))]
17447pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17448 transmute(src:vpexpandd128(a:a.as_i32x4(), src:src.as_i32x4(), mask:k))
17449}
17450
17451/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17452///
17453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
17454#[inline]
17455#[target_feature(enable = "avx512f,avx512vl")]
17456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17457#[cfg_attr(test, assert_instr(vpexpandd))]
17458pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
17459 transmute(src:vpexpandd128(
17460 a:a.as_i32x4(),
17461 src:_mm_setzero_si128().as_i32x4(),
17462 mask:k,
17463 ))
17464}
17465
17466/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17467///
17468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
17469#[inline]
17470#[target_feature(enable = "avx512f")]
17471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17472#[cfg_attr(test, assert_instr(vpexpandq))]
17473pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
17474 transmute(src:vpexpandq(a:a.as_i64x8(), src:src.as_i64x8(), mask:k))
17475}
17476
17477/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17478///
17479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
17480#[inline]
17481#[target_feature(enable = "avx512f")]
17482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17483#[cfg_attr(test, assert_instr(vpexpandq))]
17484pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
17485 transmute(src:vpexpandq(
17486 a:a.as_i64x8(),
17487 src:_mm512_setzero_si512().as_i64x8(),
17488 mask:k,
17489 ))
17490}
17491
17492/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17493///
17494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
17495#[inline]
17496#[target_feature(enable = "avx512f,avx512vl")]
17497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17498#[cfg_attr(test, assert_instr(vpexpandq))]
17499pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17500 transmute(src:vpexpandq256(a:a.as_i64x4(), src:src.as_i64x4(), mask:k))
17501}
17502
17503/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17504///
17505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
17506#[inline]
17507#[target_feature(enable = "avx512f,avx512vl")]
17508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17509#[cfg_attr(test, assert_instr(vpexpandq))]
17510pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
17511 transmute(src:vpexpandq256(
17512 a:a.as_i64x4(),
17513 src:_mm256_setzero_si256().as_i64x4(),
17514 mask:k,
17515 ))
17516}
17517
17518/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17519///
17520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
17521#[inline]
17522#[target_feature(enable = "avx512f,avx512vl")]
17523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17524#[cfg_attr(test, assert_instr(vpexpandq))]
17525pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17526 transmute(src:vpexpandq128(a:a.as_i64x2(), src:src.as_i64x2(), mask:k))
17527}
17528
17529/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17530///
17531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
17532#[inline]
17533#[target_feature(enable = "avx512f,avx512vl")]
17534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17535#[cfg_attr(test, assert_instr(vpexpandq))]
17536pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
17537 transmute(src:vpexpandq128(
17538 a:a.as_i64x2(),
17539 src:_mm_setzero_si128().as_i64x2(),
17540 mask:k,
17541 ))
17542}
17543
17544/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17545///
17546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
17547#[inline]
17548#[target_feature(enable = "avx512f")]
17549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17550#[cfg_attr(test, assert_instr(vexpandps))]
17551pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
17552 transmute(src:vexpandps(a:a.as_f32x16(), src:src.as_f32x16(), mask:k))
17553}
17554
17555/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17556///
17557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
17558#[inline]
17559#[target_feature(enable = "avx512f")]
17560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17561#[cfg_attr(test, assert_instr(vexpandps))]
17562pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
17563 transmute(src:vexpandps(a:a.as_f32x16(), src:_mm512_setzero_ps().as_f32x16(), mask:k))
17564}
17565
17566/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17567///
17568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
17569#[inline]
17570#[target_feature(enable = "avx512f,avx512vl")]
17571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17572#[cfg_attr(test, assert_instr(vexpandps))]
17573pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
17574 transmute(src:vexpandps256(a:a.as_f32x8(), src:src.as_f32x8(), mask:k))
17575}
17576
17577/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17578///
17579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
17580#[inline]
17581#[target_feature(enable = "avx512f,avx512vl")]
17582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17583#[cfg_attr(test, assert_instr(vexpandps))]
17584pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
17585 transmute(src:vexpandps256(
17586 a:a.as_f32x8(),
17587 src:_mm256_setzero_ps().as_f32x8(),
17588 mask:k,
17589 ))
17590}
17591
17592/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17593///
17594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
17595#[inline]
17596#[target_feature(enable = "avx512f,avx512vl")]
17597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17598#[cfg_attr(test, assert_instr(vexpandps))]
17599pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
17600 transmute(src:vexpandps128(a:a.as_f32x4(), src:src.as_f32x4(), mask:k))
17601}
17602
17603/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17604///
17605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
17606#[inline]
17607#[target_feature(enable = "avx512f,avx512vl")]
17608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17609#[cfg_attr(test, assert_instr(vexpandps))]
17610pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
17611 transmute(src:vexpandps128(a:a.as_f32x4(), src:_mm_setzero_ps().as_f32x4(), mask:k))
17612}
17613
17614/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17615///
17616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
17617#[inline]
17618#[target_feature(enable = "avx512f")]
17619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17620#[cfg_attr(test, assert_instr(vexpandpd))]
17621pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
17622 transmute(src:vexpandpd(a:a.as_f64x8(), src:src.as_f64x8(), mask:k))
17623}
17624
17625/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17626///
17627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
17628#[inline]
17629#[target_feature(enable = "avx512f")]
17630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17631#[cfg_attr(test, assert_instr(vexpandpd))]
17632pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
17633 transmute(src:vexpandpd(a:a.as_f64x8(), src:_mm512_setzero_pd().as_f64x8(), mask:k))
17634}
17635
17636/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17637///
17638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
17639#[inline]
17640#[target_feature(enable = "avx512f,avx512vl")]
17641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17642#[cfg_attr(test, assert_instr(vexpandpd))]
17643pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
17644 transmute(src:vexpandpd256(a:a.as_f64x4(), src:src.as_f64x4(), mask:k))
17645}
17646
17647/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17648///
17649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
17650#[inline]
17651#[target_feature(enable = "avx512f,avx512vl")]
17652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17653#[cfg_attr(test, assert_instr(vexpandpd))]
17654pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
17655 transmute(src:vexpandpd256(
17656 a:a.as_f64x4(),
17657 src:_mm256_setzero_pd().as_f64x4(),
17658 mask:k,
17659 ))
17660}
17661
17662/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17663///
17664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
17665#[inline]
17666#[target_feature(enable = "avx512f,avx512vl")]
17667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17668#[cfg_attr(test, assert_instr(vexpandpd))]
17669pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
17670 transmute(src:vexpandpd128(a:a.as_f64x2(), src:src.as_f64x2(), mask:k))
17671}
17672
17673/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17674///
17675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
17676#[inline]
17677#[target_feature(enable = "avx512f,avx512vl")]
17678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17679#[cfg_attr(test, assert_instr(vexpandpd))]
17680pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
17681 transmute(src:vexpandpd128(a:a.as_f64x2(), src:_mm_setzero_pd().as_f64x2(), mask:k))
17682}
17683
17684/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17685///
17686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
17687#[inline]
17688#[target_feature(enable = "avx512f")]
17689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17690#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17691#[rustc_legacy_const_generics(1)]
17692pub unsafe fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
17693 static_assert_uimm_bits!(IMM8, 8);
17694 let a: i32x16 = a.as_i32x16();
17695 let r: i32x16 = vprold(a, IMM8);
17696 transmute(src:r)
17697}
17698
17699/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17700///
17701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
17702#[inline]
17703#[target_feature(enable = "avx512f")]
17704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17705#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17706#[rustc_legacy_const_generics(3)]
17707pub unsafe fn _mm512_mask_rol_epi32<const IMM8: i32>(
17708 src: __m512i,
17709 k: __mmask16,
17710 a: __m512i,
17711) -> __m512i {
17712 static_assert_uimm_bits!(IMM8, 8);
17713 let a: i32x16 = a.as_i32x16();
17714 let r: i32x16 = vprold(a, IMM8);
17715 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
17716}
17717
17718/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17719///
17720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
17721#[inline]
17722#[target_feature(enable = "avx512f")]
17723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17724#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17725#[rustc_legacy_const_generics(2)]
17726pub unsafe fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
17727 static_assert_uimm_bits!(IMM8, 8);
17728 let a: i32x16 = a.as_i32x16();
17729 let r: i32x16 = vprold(a, IMM8);
17730 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
17731 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17732}
17733
17734/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17735///
17736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
17737#[inline]
17738#[target_feature(enable = "avx512f,avx512vl")]
17739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17740#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17741#[rustc_legacy_const_generics(1)]
17742pub unsafe fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
17743 static_assert_uimm_bits!(IMM8, 8);
17744 let a: i32x8 = a.as_i32x8();
17745 let r: i32x8 = vprold256(a, IMM8);
17746 transmute(src:r)
17747}
17748
17749/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17750///
17751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
17752#[inline]
17753#[target_feature(enable = "avx512f,avx512vl")]
17754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17755#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17756#[rustc_legacy_const_generics(3)]
17757pub unsafe fn _mm256_mask_rol_epi32<const IMM8: i32>(
17758 src: __m256i,
17759 k: __mmask8,
17760 a: __m256i,
17761) -> __m256i {
17762 static_assert_uimm_bits!(IMM8, 8);
17763 let a: i32x8 = a.as_i32x8();
17764 let r: i32x8 = vprold256(a, IMM8);
17765 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
17766}
17767
17768/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17769///
17770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
17771#[inline]
17772#[target_feature(enable = "avx512f,avx512vl")]
17773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17774#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17775#[rustc_legacy_const_generics(2)]
17776pub unsafe fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
17777 static_assert_uimm_bits!(IMM8, 8);
17778 let a: i32x8 = a.as_i32x8();
17779 let r: i32x8 = vprold256(a, IMM8);
17780 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
17781 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17782}
17783
17784/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17785///
17786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
17787#[inline]
17788#[target_feature(enable = "avx512f,avx512vl")]
17789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17790#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17791#[rustc_legacy_const_generics(1)]
17792pub unsafe fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
17793 static_assert_uimm_bits!(IMM8, 8);
17794 let a: i32x4 = a.as_i32x4();
17795 let r: i32x4 = vprold128(a, IMM8);
17796 transmute(src:r)
17797}
17798
17799/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17800///
17801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
17802#[inline]
17803#[target_feature(enable = "avx512f,avx512vl")]
17804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17805#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17806#[rustc_legacy_const_generics(3)]
17807pub unsafe fn _mm_mask_rol_epi32<const IMM8: i32>(
17808 src: __m128i,
17809 k: __mmask8,
17810 a: __m128i,
17811) -> __m128i {
17812 static_assert_uimm_bits!(IMM8, 8);
17813 let a: i32x4 = a.as_i32x4();
17814 let r: i32x4 = vprold128(a, IMM8);
17815 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
17816}
17817
17818/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17819///
17820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
17821#[inline]
17822#[target_feature(enable = "avx512f,avx512vl")]
17823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17824#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17825#[rustc_legacy_const_generics(2)]
17826pub unsafe fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
17827 static_assert_uimm_bits!(IMM8, 8);
17828 let a: i32x4 = a.as_i32x4();
17829 let r: i32x4 = vprold128(a, IMM8);
17830 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
17831 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17832}
17833
17834/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17835///
17836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
17837#[inline]
17838#[target_feature(enable = "avx512f")]
17839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17840#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17841#[rustc_legacy_const_generics(1)]
17842pub unsafe fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
17843 static_assert_uimm_bits!(IMM8, 8);
17844 let a: i32x16 = a.as_i32x16();
17845 let r: i32x16 = vprord(a, IMM8);
17846 transmute(src:r)
17847}
17848
17849/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17850///
17851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
17852#[inline]
17853#[target_feature(enable = "avx512f")]
17854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17855#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
17856#[rustc_legacy_const_generics(3)]
17857pub unsafe fn _mm512_mask_ror_epi32<const IMM8: i32>(
17858 src: __m512i,
17859 k: __mmask16,
17860 a: __m512i,
17861) -> __m512i {
17862 static_assert_uimm_bits!(IMM8, 8);
17863 let a: i32x16 = a.as_i32x16();
17864 let r: i32x16 = vprord(a, IMM8);
17865 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
17866}
17867
17868/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17869///
17870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
17871#[inline]
17872#[target_feature(enable = "avx512f")]
17873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17874#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
17875#[rustc_legacy_const_generics(2)]
17876pub unsafe fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
17877 static_assert_uimm_bits!(IMM8, 8);
17878 let a: i32x16 = a.as_i32x16();
17879 let r: i32x16 = vprord(a, IMM8);
17880 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
17881 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17882}
17883
17884/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17885///
17886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
17887#[inline]
17888#[target_feature(enable = "avx512f,avx512vl")]
17889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17890#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17891#[rustc_legacy_const_generics(1)]
17892pub unsafe fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
17893 static_assert_uimm_bits!(IMM8, 8);
17894 let a: i32x8 = a.as_i32x8();
17895 let r: i32x8 = vprord256(a, IMM8);
17896 transmute(src:r)
17897}
17898
17899/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17900///
17901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
17902#[inline]
17903#[target_feature(enable = "avx512f,avx512vl")]
17904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17905#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
17906#[rustc_legacy_const_generics(3)]
17907pub unsafe fn _mm256_mask_ror_epi32<const IMM8: i32>(
17908 src: __m256i,
17909 k: __mmask8,
17910 a: __m256i,
17911) -> __m256i {
17912 static_assert_uimm_bits!(IMM8, 8);
17913 let a: i32x8 = a.as_i32x8();
17914 let r: i32x8 = vprord256(a, IMM8);
17915 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
17916}
17917
17918/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17919///
17920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
17921#[inline]
17922#[target_feature(enable = "avx512f,avx512vl")]
17923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17924#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
17925#[rustc_legacy_const_generics(2)]
17926pub unsafe fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
17927 static_assert_uimm_bits!(IMM8, 8);
17928 let a: i32x8 = a.as_i32x8();
17929 let r: i32x8 = vprord256(a, IMM8);
17930 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
17931 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17932}
17933
17934/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17935///
17936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
17937#[inline]
17938#[target_feature(enable = "avx512f,avx512vl")]
17939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17940#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17941#[rustc_legacy_const_generics(1)]
17942pub unsafe fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
17943 static_assert_uimm_bits!(IMM8, 8);
17944 let a: i32x4 = a.as_i32x4();
17945 let r: i32x4 = vprord128(a, IMM8);
17946 transmute(src:r)
17947}
17948
17949/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17950///
17951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
17952#[inline]
17953#[target_feature(enable = "avx512f,avx512vl")]
17954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17955#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
17956#[rustc_legacy_const_generics(3)]
17957pub unsafe fn _mm_mask_ror_epi32<const IMM8: i32>(
17958 src: __m128i,
17959 k: __mmask8,
17960 a: __m128i,
17961) -> __m128i {
17962 static_assert_uimm_bits!(IMM8, 8);
17963 let a: i32x4 = a.as_i32x4();
17964 let r: i32x4 = vprord128(a, IMM8);
17965 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
17966}
17967
17968/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17969///
17970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
17971#[inline]
17972#[target_feature(enable = "avx512f,avx512vl")]
17973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17974#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
17975#[rustc_legacy_const_generics(2)]
17976pub unsafe fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
17977 static_assert_uimm_bits!(IMM8, 8);
17978 let a: i32x4 = a.as_i32x4();
17979 let r: i32x4 = vprord128(a, IMM8);
17980 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
17981 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
17982}
17983
17984/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17985///
17986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
17987#[inline]
17988#[target_feature(enable = "avx512f")]
17989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17990#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
17991#[rustc_legacy_const_generics(1)]
17992pub unsafe fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
17993 static_assert_uimm_bits!(IMM8, 8);
17994 let a: i64x8 = a.as_i64x8();
17995 let r: i64x8 = vprolq(a, IMM8);
17996 transmute(src:r)
17997}
17998
17999/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18000///
18001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
18002#[inline]
18003#[target_feature(enable = "avx512f")]
18004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18005#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18006#[rustc_legacy_const_generics(3)]
18007pub unsafe fn _mm512_mask_rol_epi64<const IMM8: i32>(
18008 src: __m512i,
18009 k: __mmask8,
18010 a: __m512i,
18011) -> __m512i {
18012 static_assert_uimm_bits!(IMM8, 8);
18013 let a: i64x8 = a.as_i64x8();
18014 let r: i64x8 = vprolq(a, IMM8);
18015 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
18016}
18017
18018/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18019///
18020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
18021#[inline]
18022#[target_feature(enable = "avx512f")]
18023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18024#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18025#[rustc_legacy_const_generics(2)]
18026pub unsafe fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
18027 static_assert_uimm_bits!(IMM8, 8);
18028 let a: i64x8 = a.as_i64x8();
18029 let r: i64x8 = vprolq(a, IMM8);
18030 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
18031 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18032}
18033
18034/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18035///
18036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
18037#[inline]
18038#[target_feature(enable = "avx512f,avx512vl")]
18039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18040#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18041#[rustc_legacy_const_generics(1)]
18042pub unsafe fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
18043 static_assert_uimm_bits!(IMM8, 8);
18044 let a: i64x4 = a.as_i64x4();
18045 let r: i64x4 = vprolq256(a, IMM8);
18046 transmute(src:r)
18047}
18048
18049/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18050///
18051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
18052#[inline]
18053#[target_feature(enable = "avx512f,avx512vl")]
18054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18055#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18056#[rustc_legacy_const_generics(3)]
18057pub unsafe fn _mm256_mask_rol_epi64<const IMM8: i32>(
18058 src: __m256i,
18059 k: __mmask8,
18060 a: __m256i,
18061) -> __m256i {
18062 static_assert_uimm_bits!(IMM8, 8);
18063 let a: i64x4 = a.as_i64x4();
18064 let r: i64x4 = vprolq256(a, IMM8);
18065 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
18066}
18067
18068/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18069///
18070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
18071#[inline]
18072#[target_feature(enable = "avx512f,avx512vl")]
18073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18074#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18075#[rustc_legacy_const_generics(2)]
18076pub unsafe fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18077 static_assert_uimm_bits!(IMM8, 8);
18078 let a: i64x4 = a.as_i64x4();
18079 let r: i64x4 = vprolq256(a, IMM8);
18080 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
18081 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18082}
18083
18084/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18085///
18086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
18087#[inline]
18088#[target_feature(enable = "avx512f,avx512vl")]
18089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18090#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18091#[rustc_legacy_const_generics(1)]
18092pub unsafe fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
18093 static_assert_uimm_bits!(IMM8, 8);
18094 let a: i64x2 = a.as_i64x2();
18095 let r: i64x2 = vprolq128(a, IMM8);
18096 transmute(src:r)
18097}
18098
18099/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18100///
18101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
18102#[inline]
18103#[target_feature(enable = "avx512f,avx512vl")]
18104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18105#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18106#[rustc_legacy_const_generics(3)]
18107pub unsafe fn _mm_mask_rol_epi64<const IMM8: i32>(
18108 src: __m128i,
18109 k: __mmask8,
18110 a: __m128i,
18111) -> __m128i {
18112 static_assert_uimm_bits!(IMM8, 8);
18113 let a: i64x2 = a.as_i64x2();
18114 let r: i64x2 = vprolq128(a, IMM8);
18115 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
18116}
18117
18118/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18119///
18120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
18121#[inline]
18122#[target_feature(enable = "avx512f,avx512vl")]
18123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18124#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18125#[rustc_legacy_const_generics(2)]
18126pub unsafe fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18127 static_assert_uimm_bits!(IMM8, 8);
18128 let a: i64x2 = a.as_i64x2();
18129 let r: i64x2 = vprolq128(a, IMM8);
18130 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
18131 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18132}
18133
18134/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18135///
18136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
18137#[inline]
18138#[target_feature(enable = "avx512f")]
18139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18140#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18141#[rustc_legacy_const_generics(1)]
18142pub unsafe fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
18143 static_assert_uimm_bits!(IMM8, 8);
18144 let a: i64x8 = a.as_i64x8();
18145 let r: i64x8 = vprorq(a, IMM8);
18146 transmute(src:r)
18147}
18148
18149/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18150///
18151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
18152#[inline]
18153#[target_feature(enable = "avx512f")]
18154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18155#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18156#[rustc_legacy_const_generics(3)]
18157pub unsafe fn _mm512_mask_ror_epi64<const IMM8: i32>(
18158 src: __m512i,
18159 k: __mmask8,
18160 a: __m512i,
18161) -> __m512i {
18162 static_assert_uimm_bits!(IMM8, 8);
18163 let a: i64x8 = a.as_i64x8();
18164 let r: i64x8 = vprorq(a, IMM8);
18165 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
18166}
18167
18168/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18169///
18170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
18171#[inline]
18172#[target_feature(enable = "avx512f")]
18173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18174#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18175#[rustc_legacy_const_generics(2)]
18176pub unsafe fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
18177 static_assert_uimm_bits!(IMM8, 8);
18178 let a: i64x8 = a.as_i64x8();
18179 let r: i64x8 = vprorq(a, IMM8);
18180 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
18181 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18182}
18183
18184/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18185///
18186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
18187#[inline]
18188#[target_feature(enable = "avx512f,avx512vl")]
18189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18190#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18191#[rustc_legacy_const_generics(1)]
18192pub unsafe fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
18193 static_assert_uimm_bits!(IMM8, 8);
18194 let a: i64x4 = a.as_i64x4();
18195 let r: i64x4 = vprorq256(a, IMM8);
18196 transmute(src:r)
18197}
18198
18199/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18200///
18201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
18202#[inline]
18203#[target_feature(enable = "avx512f,avx512vl")]
18204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18205#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18206#[rustc_legacy_const_generics(3)]
18207pub unsafe fn _mm256_mask_ror_epi64<const IMM8: i32>(
18208 src: __m256i,
18209 k: __mmask8,
18210 a: __m256i,
18211) -> __m256i {
18212 static_assert_uimm_bits!(IMM8, 8);
18213 let a: i64x4 = a.as_i64x4();
18214 let r: i64x4 = vprorq256(a, IMM8);
18215 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
18216}
18217
18218/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18219///
18220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
18221#[inline]
18222#[target_feature(enable = "avx512f,avx512vl")]
18223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18224#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18225#[rustc_legacy_const_generics(2)]
18226pub unsafe fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18227 static_assert_uimm_bits!(IMM8, 8);
18228 let a: i64x4 = a.as_i64x4();
18229 let r: i64x4 = vprorq256(a, IMM8);
18230 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
18231 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18232}
18233
18234/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18235///
18236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
18237#[inline]
18238#[target_feature(enable = "avx512f,avx512vl")]
18239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18240#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18241#[rustc_legacy_const_generics(1)]
18242pub unsafe fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
18243 static_assert_uimm_bits!(IMM8, 8);
18244 let a: i64x2 = a.as_i64x2();
18245 let r: i64x2 = vprorq128(a, IMM8);
18246 transmute(src:r)
18247}
18248
18249/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18250///
18251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
18252#[inline]
18253#[target_feature(enable = "avx512f,avx512vl")]
18254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18255#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18256#[rustc_legacy_const_generics(3)]
18257pub unsafe fn _mm_mask_ror_epi64<const IMM8: i32>(
18258 src: __m128i,
18259 k: __mmask8,
18260 a: __m128i,
18261) -> __m128i {
18262 static_assert_uimm_bits!(IMM8, 8);
18263 let a: i64x2 = a.as_i64x2();
18264 let r: i64x2 = vprorq128(a, IMM8);
18265 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
18266}
18267
18268/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18269///
18270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
18271#[inline]
18272#[target_feature(enable = "avx512f,avx512vl")]
18273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18274#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18275#[rustc_legacy_const_generics(2)]
18276pub unsafe fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18277 static_assert_uimm_bits!(IMM8, 8);
18278 let a: i64x2 = a.as_i64x2();
18279 let r: i64x2 = vprorq128(a, IMM8);
18280 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
18281 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18282}
18283
18284/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
18285///
18286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
18287#[inline]
18288#[target_feature(enable = "avx512f")]
18289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18290#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18291#[rustc_legacy_const_generics(1)]
18292pub unsafe fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
18293 static_assert_uimm_bits!(IMM8, 8);
18294 if IMM8 >= 32 {
18295 _mm512_setzero_si512()
18296 } else {
18297 transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
18298 }
18299}
18300
18301/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18302///
18303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
18304#[inline]
18305#[target_feature(enable = "avx512f")]
18306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18307#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18308#[rustc_legacy_const_generics(3)]
18309pub unsafe fn _mm512_mask_slli_epi32<const IMM8: u32>(
18310 src: __m512i,
18311 k: __mmask16,
18312 a: __m512i,
18313) -> __m512i {
18314 static_assert_uimm_bits!(IMM8, 8);
18315 let shf: u32x16 = if IMM8 >= 32 {
18316 u32x16::splat(0)
18317 } else {
18318 simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
18319 };
18320 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
18321}
18322
18323/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18324///
18325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
18326#[inline]
18327#[target_feature(enable = "avx512f")]
18328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18329#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18330#[rustc_legacy_const_generics(2)]
18331pub unsafe fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
18332 static_assert_uimm_bits!(IMM8, 8);
18333 if IMM8 >= 32 {
18334 _mm512_setzero_si512()
18335 } else {
18336 let shf: u32x16 = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
18337 let zero: u32x16 = u32x16::splat(0);
18338 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18339 }
18340}
18341
18342/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18343///
18344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
18345#[inline]
18346#[target_feature(enable = "avx512f,avx512vl")]
18347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18348#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18349#[rustc_legacy_const_generics(3)]
18350pub unsafe fn _mm256_mask_slli_epi32<const IMM8: u32>(
18351 src: __m256i,
18352 k: __mmask8,
18353 a: __m256i,
18354) -> __m256i {
18355 static_assert_uimm_bits!(IMM8, 8);
18356 let r: u32x8 = if IMM8 >= 32 {
18357 u32x8::splat(0)
18358 } else {
18359 simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
18360 };
18361 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
18362}
18363
18364/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18365///
18366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
18367#[inline]
18368#[target_feature(enable = "avx512f,avx512vl")]
18369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18370#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18371#[rustc_legacy_const_generics(2)]
18372pub unsafe fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18373 static_assert_uimm_bits!(IMM8, 8);
18374 if IMM8 >= 32 {
18375 _mm256_setzero_si256()
18376 } else {
18377 let r: u32x8 = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
18378 let zero: u32x8 = u32x8::splat(0);
18379 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18380 }
18381}
18382
18383/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18384///
18385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
18386#[inline]
18387#[target_feature(enable = "avx512f,avx512vl")]
18388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18389#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18390#[rustc_legacy_const_generics(3)]
18391pub unsafe fn _mm_mask_slli_epi32<const IMM8: u32>(
18392 src: __m128i,
18393 k: __mmask8,
18394 a: __m128i,
18395) -> __m128i {
18396 static_assert_uimm_bits!(IMM8, 8);
18397 let r: u32x4 = if IMM8 >= 32 {
18398 u32x4::splat(0)
18399 } else {
18400 simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
18401 };
18402 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
18403}
18404
18405/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18406///
18407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
18408#[inline]
18409#[target_feature(enable = "avx512f,avx512vl")]
18410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18411#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18412#[rustc_legacy_const_generics(2)]
18413pub unsafe fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18414 static_assert_uimm_bits!(IMM8, 8);
18415 if IMM8 >= 32 {
18416 _mm_setzero_si128()
18417 } else {
18418 let r: u32x4 = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
18419 let zero: u32x4 = u32x4::splat(0);
18420 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18421 }
18422}
18423
18424/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
18425///
18426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
18427#[inline]
18428#[target_feature(enable = "avx512f")]
18429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18430#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18431#[rustc_legacy_const_generics(1)]
18432pub unsafe fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
18433 static_assert_uimm_bits!(IMM8, 8);
18434 if IMM8 >= 32 {
18435 _mm512_setzero_si512()
18436 } else {
18437 transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
18438 }
18439}
18440
18441/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18442///
18443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
18444#[inline]
18445#[target_feature(enable = "avx512f")]
18446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18447#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18448#[rustc_legacy_const_generics(3)]
18449pub unsafe fn _mm512_mask_srli_epi32<const IMM8: u32>(
18450 src: __m512i,
18451 k: __mmask16,
18452 a: __m512i,
18453) -> __m512i {
18454 static_assert_uimm_bits!(IMM8, 8);
18455 let shf: u32x16 = if IMM8 >= 32 {
18456 u32x16::splat(0)
18457 } else {
18458 simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
18459 };
18460 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
18461}
18462
18463/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18464///
18465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
18466#[inline]
18467#[target_feature(enable = "avx512f")]
18468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18469#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18470#[rustc_legacy_const_generics(2)]
18471pub unsafe fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
18472 static_assert_uimm_bits!(IMM8, 8);
18473 if IMM8 >= 32 {
18474 _mm512_setzero_si512()
18475 } else {
18476 let shf: u32x16 = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
18477 let zero: u32x16 = u32x16::splat(0);
18478 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18479 }
18480}
18481
18482/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18483///
18484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
18485#[inline]
18486#[target_feature(enable = "avx512f,avx512vl")]
18487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18488#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18489#[rustc_legacy_const_generics(3)]
18490pub unsafe fn _mm256_mask_srli_epi32<const IMM8: u32>(
18491 src: __m256i,
18492 k: __mmask8,
18493 a: __m256i,
18494) -> __m256i {
18495 static_assert_uimm_bits!(IMM8, 8);
18496 let r: u32x8 = if IMM8 >= 32 {
18497 u32x8::splat(0)
18498 } else {
18499 simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
18500 };
18501 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
18502}
18503
18504/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18505///
18506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
18507#[inline]
18508#[target_feature(enable = "avx512f,avx512vl")]
18509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18510#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18511#[rustc_legacy_const_generics(2)]
18512pub unsafe fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18513 static_assert_uimm_bits!(IMM8, 8);
18514 if IMM8 >= 32 {
18515 _mm256_setzero_si256()
18516 } else {
18517 let r: u32x8 = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
18518 let zero: u32x8 = u32x8::splat(0);
18519 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18520 }
18521}
18522
18523/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18524///
18525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
18526#[inline]
18527#[target_feature(enable = "avx512f,avx512vl")]
18528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18529#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18530#[rustc_legacy_const_generics(3)]
18531pub unsafe fn _mm_mask_srli_epi32<const IMM8: u32>(
18532 src: __m128i,
18533 k: __mmask8,
18534 a: __m128i,
18535) -> __m128i {
18536 static_assert_uimm_bits!(IMM8, 8);
18537 let r: u32x4 = if IMM8 >= 32 {
18538 u32x4::splat(0)
18539 } else {
18540 simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
18541 };
18542 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
18543}
18544
18545/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18546///
18547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
18548#[inline]
18549#[target_feature(enable = "avx512f,avx512vl")]
18550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18551#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18552#[rustc_legacy_const_generics(2)]
18553pub unsafe fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18554 static_assert_uimm_bits!(IMM8, 8);
18555 if IMM8 >= 32 {
18556 _mm_setzero_si128()
18557 } else {
18558 let r: u32x4 = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
18559 let zero: u32x4 = u32x4::splat(0);
18560 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18561 }
18562}
18563
18564/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
18565///
18566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
18567#[inline]
18568#[target_feature(enable = "avx512f")]
18569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18570#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18571#[rustc_legacy_const_generics(1)]
18572pub unsafe fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
18573 static_assert_uimm_bits!(IMM8, 8);
18574 if IMM8 >= 64 {
18575 _mm512_setzero_si512()
18576 } else {
18577 transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
18578 }
18579}
18580
18581/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18582///
18583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
18584#[inline]
18585#[target_feature(enable = "avx512f")]
18586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18587#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18588#[rustc_legacy_const_generics(3)]
18589pub unsafe fn _mm512_mask_slli_epi64<const IMM8: u32>(
18590 src: __m512i,
18591 k: __mmask8,
18592 a: __m512i,
18593) -> __m512i {
18594 static_assert_uimm_bits!(IMM8, 8);
18595 let shf: u64x8 = if IMM8 >= 64 {
18596 u64x8::splat(0)
18597 } else {
18598 simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
18599 };
18600 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
18601}
18602
18603/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18604///
18605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
18606#[inline]
18607#[target_feature(enable = "avx512f")]
18608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18609#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18610#[rustc_legacy_const_generics(2)]
18611pub unsafe fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
18612 static_assert_uimm_bits!(IMM8, 8);
18613 if IMM8 >= 64 {
18614 _mm512_setzero_si512()
18615 } else {
18616 let shf: u64x8 = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
18617 let zero: u64x8 = u64x8::splat(0);
18618 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18619 }
18620}
18621
18622/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18623///
18624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
18625#[inline]
18626#[target_feature(enable = "avx512f,avx512vl")]
18627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18628#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18629#[rustc_legacy_const_generics(3)]
18630pub unsafe fn _mm256_mask_slli_epi64<const IMM8: u32>(
18631 src: __m256i,
18632 k: __mmask8,
18633 a: __m256i,
18634) -> __m256i {
18635 static_assert_uimm_bits!(IMM8, 8);
18636 let r: u64x4 = if IMM8 >= 64 {
18637 u64x4::splat(0)
18638 } else {
18639 simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
18640 };
18641 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
18642}
18643
18644/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18645///
18646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
18647#[inline]
18648#[target_feature(enable = "avx512f,avx512vl")]
18649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18650#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18651#[rustc_legacy_const_generics(2)]
18652pub unsafe fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18653 static_assert_uimm_bits!(IMM8, 8);
18654 if IMM8 >= 64 {
18655 _mm256_setzero_si256()
18656 } else {
18657 let r: u64x4 = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
18658 let zero: u64x4 = u64x4::splat(0);
18659 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18660 }
18661}
18662
18663/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18664///
18665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
18666#[inline]
18667#[target_feature(enable = "avx512f,avx512vl")]
18668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18669#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18670#[rustc_legacy_const_generics(3)]
18671pub unsafe fn _mm_mask_slli_epi64<const IMM8: u32>(
18672 src: __m128i,
18673 k: __mmask8,
18674 a: __m128i,
18675) -> __m128i {
18676 static_assert_uimm_bits!(IMM8, 8);
18677 let r: u64x2 = if IMM8 >= 64 {
18678 u64x2::splat(0)
18679 } else {
18680 simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
18681 };
18682 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
18683}
18684
18685/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18686///
18687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
18688#[inline]
18689#[target_feature(enable = "avx512f,avx512vl")]
18690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18691#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18692#[rustc_legacy_const_generics(2)]
18693pub unsafe fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18694 static_assert_uimm_bits!(IMM8, 8);
18695 if IMM8 >= 64 {
18696 _mm_setzero_si128()
18697 } else {
18698 let r: u64x2 = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
18699 let zero: u64x2 = u64x2::splat(0);
18700 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18701 }
18702}
18703
18704/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
18705///
18706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
18707#[inline]
18708#[target_feature(enable = "avx512f")]
18709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18710#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18711#[rustc_legacy_const_generics(1)]
18712pub unsafe fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
18713 static_assert_uimm_bits!(IMM8, 8);
18714 if IMM8 >= 64 {
18715 _mm512_setzero_si512()
18716 } else {
18717 transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
18718 }
18719}
18720
18721/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18722///
18723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
18724#[inline]
18725#[target_feature(enable = "avx512f")]
18726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18727#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18728#[rustc_legacy_const_generics(3)]
18729pub unsafe fn _mm512_mask_srli_epi64<const IMM8: u32>(
18730 src: __m512i,
18731 k: __mmask8,
18732 a: __m512i,
18733) -> __m512i {
18734 static_assert_uimm_bits!(IMM8, 8);
18735 let shf: u64x8 = if IMM8 >= 64 {
18736 u64x8::splat(0)
18737 } else {
18738 simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
18739 };
18740 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
18741}
18742
18743/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18744///
18745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
18746#[inline]
18747#[target_feature(enable = "avx512f")]
18748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18749#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18750#[rustc_legacy_const_generics(2)]
18751pub unsafe fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
18752 static_assert_uimm_bits!(IMM8, 8);
18753 if IMM8 >= 64 {
18754 _mm512_setzero_si512()
18755 } else {
18756 let shf: u64x8 = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
18757 let zero: u64x8 = u64x8::splat(0);
18758 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18759 }
18760}
18761
18762/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18763///
18764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
18765#[inline]
18766#[target_feature(enable = "avx512f,avx512vl")]
18767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18768#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18769#[rustc_legacy_const_generics(3)]
18770pub unsafe fn _mm256_mask_srli_epi64<const IMM8: u32>(
18771 src: __m256i,
18772 k: __mmask8,
18773 a: __m256i,
18774) -> __m256i {
18775 static_assert_uimm_bits!(IMM8, 8);
18776 let r: u64x4 = if IMM8 >= 64 {
18777 u64x4::splat(0)
18778 } else {
18779 simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
18780 };
18781 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
18782}
18783
18784/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18785///
18786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
18787#[inline]
18788#[target_feature(enable = "avx512f,avx512vl")]
18789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18790#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18791#[rustc_legacy_const_generics(2)]
18792pub unsafe fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18793 static_assert_uimm_bits!(IMM8, 8);
18794 if IMM8 >= 64 {
18795 _mm256_setzero_si256()
18796 } else {
18797 let r: u64x4 = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
18798 let zero: u64x4 = u64x4::splat(0);
18799 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18800 }
18801}
18802
18803/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18804///
18805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
18806#[inline]
18807#[target_feature(enable = "avx512f,avx512vl")]
18808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18809#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18810#[rustc_legacy_const_generics(3)]
18811pub unsafe fn _mm_mask_srli_epi64<const IMM8: u32>(
18812 src: __m128i,
18813 k: __mmask8,
18814 a: __m128i,
18815) -> __m128i {
18816 static_assert_uimm_bits!(IMM8, 8);
18817 let r: u64x2 = if IMM8 >= 64 {
18818 u64x2::splat(0)
18819 } else {
18820 simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
18821 };
18822 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
18823}
18824
18825/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18826///
18827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
18828#[inline]
18829#[target_feature(enable = "avx512f,avx512vl")]
18830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18831#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18832#[rustc_legacy_const_generics(2)]
18833pub unsafe fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18834 static_assert_uimm_bits!(IMM8, 8);
18835 if IMM8 >= 64 {
18836 _mm_setzero_si128()
18837 } else {
18838 let r: u64x2 = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
18839 let zero: u64x2 = u64x2::splat(0);
18840 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
18841 }
18842}
18843
18844/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
18845///
18846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
18847#[inline]
18848#[target_feature(enable = "avx512f")]
18849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18850#[cfg_attr(test, assert_instr(vpslld))]
18851pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
18852 transmute(src:vpslld(a:a.as_i32x16(), count:count.as_i32x4()))
18853}
18854
18855/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18856///
18857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
18858#[inline]
18859#[target_feature(enable = "avx512f")]
18860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18861#[cfg_attr(test, assert_instr(vpslld))]
18862pub unsafe fn _mm512_mask_sll_epi32(
18863 src: __m512i,
18864 k: __mmask16,
18865 a: __m512i,
18866 count: __m128i,
18867) -> __m512i {
18868 let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
18869 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
18870}
18871
18872/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18873///
18874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
18875#[inline]
18876#[target_feature(enable = "avx512f")]
18877#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18878#[cfg_attr(test, assert_instr(vpslld))]
18879pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
18880 let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
18881 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
18882 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18883}
18884
18885/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18886///
18887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
18888#[inline]
18889#[target_feature(enable = "avx512f,avx512vl")]
18890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18891#[cfg_attr(test, assert_instr(vpslld))]
18892pub unsafe fn _mm256_mask_sll_epi32(
18893 src: __m256i,
18894 k: __mmask8,
18895 a: __m256i,
18896 count: __m128i,
18897) -> __m256i {
18898 let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
18899 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
18900}
18901
18902/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18903///
18904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
18905#[inline]
18906#[target_feature(enable = "avx512f,avx512vl")]
18907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18908#[cfg_attr(test, assert_instr(vpslld))]
18909pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
18910 let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
18911 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
18912 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18913}
18914
18915/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18916///
18917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
18918#[inline]
18919#[target_feature(enable = "avx512f,avx512vl")]
18920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18921#[cfg_attr(test, assert_instr(vpslld))]
18922pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
18923 let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
18924 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
18925}
18926
18927/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18928///
18929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
18930#[inline]
18931#[target_feature(enable = "avx512f,avx512vl")]
18932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18933#[cfg_attr(test, assert_instr(vpslld))]
18934pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
18935 let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
18936 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
18937 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18938}
18939
18940/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
18941///
18942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
18943#[inline]
18944#[target_feature(enable = "avx512f")]
18945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18946#[cfg_attr(test, assert_instr(vpsrld))]
18947pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
18948 transmute(src:vpsrld(a:a.as_i32x16(), count:count.as_i32x4()))
18949}
18950
18951/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18952///
18953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
18954#[inline]
18955#[target_feature(enable = "avx512f")]
18956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18957#[cfg_attr(test, assert_instr(vpsrld))]
18958pub unsafe fn _mm512_mask_srl_epi32(
18959 src: __m512i,
18960 k: __mmask16,
18961 a: __m512i,
18962 count: __m128i,
18963) -> __m512i {
18964 let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
18965 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
18966}
18967
18968/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18969///
18970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
18971#[inline]
18972#[target_feature(enable = "avx512f")]
18973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18974#[cfg_attr(test, assert_instr(vpsrld))]
18975pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
18976 let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
18977 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
18978 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
18979}
18980
18981/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18982///
18983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
18984#[inline]
18985#[target_feature(enable = "avx512f,avx512vl")]
18986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18987#[cfg_attr(test, assert_instr(vpsrld))]
18988pub unsafe fn _mm256_mask_srl_epi32(
18989 src: __m256i,
18990 k: __mmask8,
18991 a: __m256i,
18992 count: __m128i,
18993) -> __m256i {
18994 let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
18995 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
18996}
18997
18998/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18999///
19000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
19001#[inline]
19002#[target_feature(enable = "avx512f,avx512vl")]
19003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19004#[cfg_attr(test, assert_instr(vpsrld))]
19005pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19006 let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
19007 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19008 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19009}
19010
19011/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19012///
19013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
19014#[inline]
19015#[target_feature(enable = "avx512f,avx512vl")]
19016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19017#[cfg_attr(test, assert_instr(vpsrld))]
19018pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19019 let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
19020 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
19021}
19022
19023/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19024///
19025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
19026#[inline]
19027#[target_feature(enable = "avx512f,avx512vl")]
19028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19029#[cfg_attr(test, assert_instr(vpsrld))]
19030pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19031 let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
19032 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
19033 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19034}
19035
19036/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
19037///
19038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
19039#[inline]
19040#[target_feature(enable = "avx512f")]
19041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19042#[cfg_attr(test, assert_instr(vpsllq))]
19043pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
19044 transmute(src:vpsllq(a:a.as_i64x8(), count:count.as_i64x2()))
19045}
19046
19047/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19048///
19049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
19050#[inline]
19051#[target_feature(enable = "avx512f")]
19052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19053#[cfg_attr(test, assert_instr(vpsllq))]
19054pub unsafe fn _mm512_mask_sll_epi64(
19055 src: __m512i,
19056 k: __mmask8,
19057 a: __m512i,
19058 count: __m128i,
19059) -> __m512i {
19060 let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
19061 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19062}
19063
19064/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19065///
19066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
19067#[inline]
19068#[target_feature(enable = "avx512f")]
19069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19070#[cfg_attr(test, assert_instr(vpsllq))]
19071pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19072 let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
19073 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19074 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19075}
19076
19077/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19078///
19079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
19080#[inline]
19081#[target_feature(enable = "avx512f,avx512vl")]
19082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19083#[cfg_attr(test, assert_instr(vpsllq))]
19084pub unsafe fn _mm256_mask_sll_epi64(
19085 src: __m256i,
19086 k: __mmask8,
19087 a: __m256i,
19088 count: __m128i,
19089) -> __m256i {
19090 let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
19091 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19092}
19093
19094/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19095///
19096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
19097#[inline]
19098#[target_feature(enable = "avx512f,avx512vl")]
19099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19100#[cfg_attr(test, assert_instr(vpsllq))]
19101pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19102 let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
19103 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19104 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19105}
19106
19107/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19108///
19109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
19110#[inline]
19111#[target_feature(enable = "avx512f,avx512vl")]
19112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19113#[cfg_attr(test, assert_instr(vpsllq))]
19114pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19115 let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
19116 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19117}
19118
19119/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19120///
19121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
19122#[inline]
19123#[target_feature(enable = "avx512f,avx512vl")]
19124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19125#[cfg_attr(test, assert_instr(vpsllq))]
19126pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19127 let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
19128 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19129 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19130}
19131
19132/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
19133///
19134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
19135#[inline]
19136#[target_feature(enable = "avx512f")]
19137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19138#[cfg_attr(test, assert_instr(vpsrlq))]
19139pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
19140 transmute(src:vpsrlq(a:a.as_i64x8(), count:count.as_i64x2()))
19141}
19142
19143/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19144///
19145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
19146#[inline]
19147#[target_feature(enable = "avx512f")]
19148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19149#[cfg_attr(test, assert_instr(vpsrlq))]
19150pub unsafe fn _mm512_mask_srl_epi64(
19151 src: __m512i,
19152 k: __mmask8,
19153 a: __m512i,
19154 count: __m128i,
19155) -> __m512i {
19156 let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
19157 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19158}
19159
19160/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19161///
19162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
19163#[inline]
19164#[target_feature(enable = "avx512f")]
19165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19166#[cfg_attr(test, assert_instr(vpsrlq))]
19167pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19168 let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
19169 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19170 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19171}
19172
19173/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19174///
19175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
19176#[inline]
19177#[target_feature(enable = "avx512f,avx512vl")]
19178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19179#[cfg_attr(test, assert_instr(vpsrlq))]
19180pub unsafe fn _mm256_mask_srl_epi64(
19181 src: __m256i,
19182 k: __mmask8,
19183 a: __m256i,
19184 count: __m128i,
19185) -> __m256i {
19186 let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
19187 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19188}
19189
19190/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19191///
19192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
19193#[inline]
19194#[target_feature(enable = "avx512f,avx512vl")]
19195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19196#[cfg_attr(test, assert_instr(vpsrlq))]
19197pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19198 let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
19199 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19200 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19201}
19202
19203/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19204///
19205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
19206#[inline]
19207#[target_feature(enable = "avx512f,avx512vl")]
19208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19209#[cfg_attr(test, assert_instr(vpsrlq))]
19210pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19211 let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
19212 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19213}
19214
19215/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19216///
19217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
19218#[inline]
19219#[target_feature(enable = "avx512f,avx512vl")]
19220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19221#[cfg_attr(test, assert_instr(vpsrlq))]
19222pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19223 let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
19224 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19225 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19226}
19227
19228/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19229///
19230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
19231#[inline]
19232#[target_feature(enable = "avx512f")]
19233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19234#[cfg_attr(test, assert_instr(vpsrad))]
19235pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
19236 transmute(src:vpsrad(a:a.as_i32x16(), count:count.as_i32x4()))
19237}
19238
19239/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19240///
19241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
19242#[inline]
19243#[target_feature(enable = "avx512f")]
19244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19245#[cfg_attr(test, assert_instr(vpsrad))]
19246pub unsafe fn _mm512_mask_sra_epi32(
19247 src: __m512i,
19248 k: __mmask16,
19249 a: __m512i,
19250 count: __m128i,
19251) -> __m512i {
19252 let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
19253 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
19254}
19255
19256/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19257///
19258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
19259#[inline]
19260#[target_feature(enable = "avx512f")]
19261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19262#[cfg_attr(test, assert_instr(vpsrad))]
19263pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
19264 let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
19265 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
19266 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19267}
19268
19269/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19270///
19271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
19272#[inline]
19273#[target_feature(enable = "avx512f,avx512vl")]
19274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19275#[cfg_attr(test, assert_instr(vpsrad))]
19276pub unsafe fn _mm256_mask_sra_epi32(
19277 src: __m256i,
19278 k: __mmask8,
19279 a: __m256i,
19280 count: __m128i,
19281) -> __m256i {
19282 let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
19283 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
19284}
19285
19286/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19287///
19288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
19289#[inline]
19290#[target_feature(enable = "avx512f,avx512vl")]
19291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19292#[cfg_attr(test, assert_instr(vpsrad))]
19293pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19294 let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
19295 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19296 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19297}
19298
19299/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19300///
19301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
19302#[inline]
19303#[target_feature(enable = "avx512f,avx512vl")]
19304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19305#[cfg_attr(test, assert_instr(vpsrad))]
19306pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19307 let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
19308 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
19309}
19310
19311/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19312///
19313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
19314#[inline]
19315#[target_feature(enable = "avx512f,avx512vl")]
19316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19317#[cfg_attr(test, assert_instr(vpsrad))]
19318pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19319 let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
19320 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
19321 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19322}
19323
19324/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19325///
19326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
19327#[inline]
19328#[target_feature(enable = "avx512f")]
19329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19330#[cfg_attr(test, assert_instr(vpsraq))]
19331pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
19332 transmute(src:vpsraq(a:a.as_i64x8(), count:count.as_i64x2()))
19333}
19334
19335/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19336///
19337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
19338#[inline]
19339#[target_feature(enable = "avx512f")]
19340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19341#[cfg_attr(test, assert_instr(vpsraq))]
19342pub unsafe fn _mm512_mask_sra_epi64(
19343 src: __m512i,
19344 k: __mmask8,
19345 a: __m512i,
19346 count: __m128i,
19347) -> __m512i {
19348 let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
19349 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19350}
19351
19352/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19353///
19354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
19355#[inline]
19356#[target_feature(enable = "avx512f")]
19357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19358#[cfg_attr(test, assert_instr(vpsraq))]
19359pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19360 let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
19361 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19362 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19363}
19364
19365/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19366///
19367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
19368#[inline]
19369#[target_feature(enable = "avx512f,avx512vl")]
19370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19371#[cfg_attr(test, assert_instr(vpsraq))]
19372pub unsafe fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
19373 transmute(src:vpsraq256(a:a.as_i64x4(), count:count.as_i64x2()))
19374}
19375
19376/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19377///
19378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
19379#[inline]
19380#[target_feature(enable = "avx512f,avx512vl")]
19381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19382#[cfg_attr(test, assert_instr(vpsraq))]
19383pub unsafe fn _mm256_mask_sra_epi64(
19384 src: __m256i,
19385 k: __mmask8,
19386 a: __m256i,
19387 count: __m128i,
19388) -> __m256i {
19389 let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
19390 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19391}
19392
19393/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19394///
19395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
19396#[inline]
19397#[target_feature(enable = "avx512f,avx512vl")]
19398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19399#[cfg_attr(test, assert_instr(vpsraq))]
19400pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19401 let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
19402 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19403 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19404}
19405
19406/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19407///
19408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
19409#[inline]
19410#[target_feature(enable = "avx512f,avx512vl")]
19411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19412#[cfg_attr(test, assert_instr(vpsraq))]
19413pub unsafe fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
19414 transmute(src:vpsraq128(a:a.as_i64x2(), count:count.as_i64x2()))
19415}
19416
19417/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19418///
19419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
19420#[inline]
19421#[target_feature(enable = "avx512f,avx512vl")]
19422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19423#[cfg_attr(test, assert_instr(vpsraq))]
19424pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19425 let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
19426 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19427}
19428
19429/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19430///
19431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
19432#[inline]
19433#[target_feature(enable = "avx512f,avx512vl")]
19434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19435#[cfg_attr(test, assert_instr(vpsraq))]
19436pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19437 let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
19438 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19439 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19440}
19441
19442/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19443///
19444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
19445#[inline]
19446#[target_feature(enable = "avx512f")]
19447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19448#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19449#[rustc_legacy_const_generics(1)]
19450pub unsafe fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19451 static_assert_uimm_bits!(IMM8, 8);
19452 transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32)))
19453}
19454
19455/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19456///
19457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
19458#[inline]
19459#[target_feature(enable = "avx512f")]
19460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19461#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19462#[rustc_legacy_const_generics(3)]
19463pub unsafe fn _mm512_mask_srai_epi32<const IMM8: u32>(
19464 src: __m512i,
19465 k: __mmask16,
19466 a: __m512i,
19467) -> __m512i {
19468 static_assert_uimm_bits!(IMM8, 8);
19469 let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
19470 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19471}
19472
19473/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19474///
19475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
19476#[inline]
19477#[target_feature(enable = "avx512f")]
19478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19480#[rustc_legacy_const_generics(2)]
19481pub unsafe fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19482 static_assert_uimm_bits!(IMM8, 8);
19483 let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
19484 let zero: i32x16 = i32x16::splat(0);
19485 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
19486}
19487
19488/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19489///
19490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
19491#[inline]
19492#[target_feature(enable = "avx512f,avx512vl")]
19493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19494#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19495#[rustc_legacy_const_generics(3)]
19496pub unsafe fn _mm256_mask_srai_epi32<const IMM8: u32>(
19497 src: __m256i,
19498 k: __mmask8,
19499 a: __m256i,
19500) -> __m256i {
19501 let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
19502 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19503}
19504
19505/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19506///
19507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
19508#[inline]
19509#[target_feature(enable = "avx512f,avx512vl")]
19510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19511#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19512#[rustc_legacy_const_generics(2)]
19513pub unsafe fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19514 let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
19515 let zero: i32x8 = i32x8::splat(0);
19516 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
19517}
19518
19519/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19520///
19521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
19522#[inline]
19523#[target_feature(enable = "avx512f,avx512vl")]
19524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19525#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19526#[rustc_legacy_const_generics(3)]
19527pub unsafe fn _mm_mask_srai_epi32<const IMM8: u32>(
19528 src: __m128i,
19529 k: __mmask8,
19530 a: __m128i,
19531) -> __m128i {
19532 let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
19533 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19534}
19535
19536/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19537///
19538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
19539#[inline]
19540#[target_feature(enable = "avx512f,avx512vl")]
19541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19542#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19543#[rustc_legacy_const_generics(2)]
19544pub unsafe fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19545 let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
19546 let zero: i32x4 = i32x4::splat(0);
19547 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
19548}
19549
19550/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19551///
19552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
19553#[inline]
19554#[target_feature(enable = "avx512f")]
19555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19556#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19557#[rustc_legacy_const_generics(1)]
19558pub unsafe fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19559 static_assert_uimm_bits!(IMM8, 8);
19560 transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64)))
19561}
19562
19563/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19564///
19565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
19566#[inline]
19567#[target_feature(enable = "avx512f")]
19568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19569#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19570#[rustc_legacy_const_generics(3)]
19571pub unsafe fn _mm512_mask_srai_epi64<const IMM8: u32>(
19572 src: __m512i,
19573 k: __mmask8,
19574 a: __m512i,
19575) -> __m512i {
19576 static_assert_uimm_bits!(IMM8, 8);
19577 let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
19578 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19579}
19580
19581/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19582///
19583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
19584#[inline]
19585#[target_feature(enable = "avx512f")]
19586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19587#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19588#[rustc_legacy_const_generics(2)]
19589pub unsafe fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19590 static_assert_uimm_bits!(IMM8, 8);
19591 let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
19592 let zero: i64x8 = i64x8::splat(0);
19593 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19594}
19595
19596/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19597///
19598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
19599#[inline]
19600#[target_feature(enable = "avx512f,avx512vl")]
19601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19602#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19603#[rustc_legacy_const_generics(1)]
19604pub unsafe fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
19605 static_assert_uimm_bits!(IMM8, 8);
19606 transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64)))
19607}
19608
19609/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19610///
19611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
19612#[inline]
19613#[target_feature(enable = "avx512f,avx512vl")]
19614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19615#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19616#[rustc_legacy_const_generics(3)]
19617pub unsafe fn _mm256_mask_srai_epi64<const IMM8: u32>(
19618 src: __m256i,
19619 k: __mmask8,
19620 a: __m256i,
19621) -> __m256i {
19622 static_assert_uimm_bits!(IMM8, 8);
19623 let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
19624 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19625}
19626
19627/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19628///
19629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
19630#[inline]
19631#[target_feature(enable = "avx512f,avx512vl")]
19632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19633#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19634#[rustc_legacy_const_generics(2)]
19635pub unsafe fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19636 static_assert_uimm_bits!(IMM8, 8);
19637 let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
19638 let zero: i64x4 = i64x4::splat(0);
19639 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19640}
19641
19642/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19643///
19644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
19645#[inline]
19646#[target_feature(enable = "avx512f,avx512vl")]
19647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19648#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19649#[rustc_legacy_const_generics(1)]
19650pub unsafe fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
19651 static_assert_uimm_bits!(IMM8, 8);
19652 transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64)))
19653}
19654
19655/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19656///
19657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
19658#[inline]
19659#[target_feature(enable = "avx512f,avx512vl")]
19660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19661#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19662#[rustc_legacy_const_generics(3)]
19663pub unsafe fn _mm_mask_srai_epi64<const IMM8: u32>(
19664 src: __m128i,
19665 k: __mmask8,
19666 a: __m128i,
19667) -> __m128i {
19668 static_assert_uimm_bits!(IMM8, 8);
19669 let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
19670 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19671}
19672
19673/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19674///
19675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
19676#[inline]
19677#[target_feature(enable = "avx512f,avx512vl")]
19678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19679#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19680#[rustc_legacy_const_generics(2)]
19681pub unsafe fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19682 static_assert_uimm_bits!(IMM8, 8);
19683 let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
19684 let zero: i64x2 = i64x2::splat(0);
19685 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19686}
19687
19688/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19689///
19690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
19691#[inline]
19692#[target_feature(enable = "avx512f")]
19693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19694#[cfg_attr(test, assert_instr(vpsravd))]
19695pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
19696 transmute(src:vpsravd(a:a.as_i32x16(), count:count.as_i32x16()))
19697}
19698
19699/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19700///
19701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
19702#[inline]
19703#[target_feature(enable = "avx512f")]
19704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19705#[cfg_attr(test, assert_instr(vpsravd))]
19706pub unsafe fn _mm512_mask_srav_epi32(
19707 src: __m512i,
19708 k: __mmask16,
19709 a: __m512i,
19710 count: __m512i,
19711) -> __m512i {
19712 let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
19713 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
19714}
19715
19716/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19717///
19718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
19719#[inline]
19720#[target_feature(enable = "avx512f")]
19721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19722#[cfg_attr(test, assert_instr(vpsravd))]
19723pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
19724 let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
19725 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
19726 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19727}
19728
19729/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19730///
19731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
19732#[inline]
19733#[target_feature(enable = "avx512f,avx512vl")]
19734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19735#[cfg_attr(test, assert_instr(vpsravd))]
19736pub unsafe fn _mm256_mask_srav_epi32(
19737 src: __m256i,
19738 k: __mmask8,
19739 a: __m256i,
19740 count: __m256i,
19741) -> __m256i {
19742 let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
19743 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
19744}
19745
19746/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19747///
19748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
19749#[inline]
19750#[target_feature(enable = "avx512f,avx512vl")]
19751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19752#[cfg_attr(test, assert_instr(vpsravd))]
19753pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
19754 let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
19755 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19756 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19757}
19758
19759/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19760///
19761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
19762#[inline]
19763#[target_feature(enable = "avx512f,avx512vl")]
19764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19765#[cfg_attr(test, assert_instr(vpsravd))]
19766pub unsafe fn _mm_mask_srav_epi32(
19767 src: __m128i,
19768 k: __mmask8,
19769 a: __m128i,
19770 count: __m128i,
19771) -> __m128i {
19772 let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
19773 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
19774}
19775
19776/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19777///
19778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
19779#[inline]
19780#[target_feature(enable = "avx512f,avx512vl")]
19781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19782#[cfg_attr(test, assert_instr(vpsravd))]
19783pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19784 let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
19785 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
19786 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19787}
19788
19789/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19790///
19791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
19792#[inline]
19793#[target_feature(enable = "avx512f")]
19794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19795#[cfg_attr(test, assert_instr(vpsravq))]
19796pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
19797 transmute(src:vpsravq(a:a.as_i64x8(), count:count.as_i64x8()))
19798}
19799
19800/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19801///
19802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
19803#[inline]
19804#[target_feature(enable = "avx512f")]
19805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19806#[cfg_attr(test, assert_instr(vpsravq))]
19807pub unsafe fn _mm512_mask_srav_epi64(
19808 src: __m512i,
19809 k: __mmask8,
19810 a: __m512i,
19811 count: __m512i,
19812) -> __m512i {
19813 let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
19814 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
19815}
19816
19817/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19818///
19819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
19820#[inline]
19821#[target_feature(enable = "avx512f")]
19822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19823#[cfg_attr(test, assert_instr(vpsravq))]
19824pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
19825 let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
19826 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
19827 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19828}
19829
19830/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19831///
19832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
19833#[inline]
19834#[target_feature(enable = "avx512f,avx512vl")]
19835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19836#[cfg_attr(test, assert_instr(vpsravq))]
19837pub unsafe fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
19838 transmute(src:vpsravq256(a:a.as_i64x4(), count:count.as_i64x4()))
19839}
19840
19841/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19842///
19843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
19844#[inline]
19845#[target_feature(enable = "avx512f,avx512vl")]
19846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19847#[cfg_attr(test, assert_instr(vpsravq))]
19848pub unsafe fn _mm256_mask_srav_epi64(
19849 src: __m256i,
19850 k: __mmask8,
19851 a: __m256i,
19852 count: __m256i,
19853) -> __m256i {
19854 let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
19855 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
19856}
19857
19858/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19859///
19860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
19861#[inline]
19862#[target_feature(enable = "avx512f,avx512vl")]
19863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19864#[cfg_attr(test, assert_instr(vpsravq))]
19865pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
19866 let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
19867 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
19868 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19869}
19870
19871/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19872///
19873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
19874#[inline]
19875#[target_feature(enable = "avx512f,avx512vl")]
19876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19877#[cfg_attr(test, assert_instr(vpsravq))]
19878pub unsafe fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
19879 transmute(src:vpsravq128(a:a.as_i64x2(), count:count.as_i64x2()))
19880}
19881
19882/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19883///
19884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
19885#[inline]
19886#[target_feature(enable = "avx512f,avx512vl")]
19887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19888#[cfg_attr(test, assert_instr(vpsravq))]
19889pub unsafe fn _mm_mask_srav_epi64(
19890 src: __m128i,
19891 k: __mmask8,
19892 a: __m128i,
19893 count: __m128i,
19894) -> __m128i {
19895 let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
19896 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
19897}
19898
19899/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19900///
19901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
19902#[inline]
19903#[target_feature(enable = "avx512f,avx512vl")]
19904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19905#[cfg_attr(test, assert_instr(vpsravq))]
19906pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19907 let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
19908 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
19909 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
19910}
19911
19912/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
19913///
19914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
19915#[inline]
19916#[target_feature(enable = "avx512f")]
19917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19918#[cfg_attr(test, assert_instr(vprolvd))]
19919pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
19920 transmute(src:vprolvd(a:a.as_i32x16(), b:b.as_i32x16()))
19921}
19922
19923/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19924///
19925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
19926#[inline]
19927#[target_feature(enable = "avx512f")]
19928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19929#[cfg_attr(test, assert_instr(vprolvd))]
19930pub unsafe fn _mm512_mask_rolv_epi32(
19931 src: __m512i,
19932 k: __mmask16,
19933 a: __m512i,
19934 b: __m512i,
19935) -> __m512i {
19936 let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
19937 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
19938}
19939
19940/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19941///
19942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
19943#[inline]
19944#[target_feature(enable = "avx512f")]
19945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19946#[cfg_attr(test, assert_instr(vprolvd))]
19947pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
19948 let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
19949 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
19950 transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
19951}
19952
19953/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
19954///
19955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
19956#[inline]
19957#[target_feature(enable = "avx512f,avx512vl")]
19958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19959#[cfg_attr(test, assert_instr(vprolvd))]
19960pub unsafe fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
19961 transmute(src:vprolvd256(a:a.as_i32x8(), b:b.as_i32x8()))
19962}
19963
19964/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19965///
19966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi3&expand=4698)
19967#[inline]
19968#[target_feature(enable = "avx512f,avx512vl")]
19969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19970#[cfg_attr(test, assert_instr(vprolvd))]
19971pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
19972 let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
19973 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
19974}
19975
19976/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19977///
19978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
19979#[inline]
19980#[target_feature(enable = "avx512f,avx512vl")]
19981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19982#[cfg_attr(test, assert_instr(vprolvd))]
19983pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
19984 let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
19985 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
19986 transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
19987}
19988
19989/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
19990///
19991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
19992#[inline]
19993#[target_feature(enable = "avx512f,avx512vl")]
19994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19995#[cfg_attr(test, assert_instr(vprolvd))]
19996pub unsafe fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
19997 transmute(src:vprolvd128(a:a.as_i32x4(), b:b.as_i32x4()))
19998}
19999
20000/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20001///
20002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
20003#[inline]
20004#[target_feature(enable = "avx512f,avx512vl")]
20005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20006#[cfg_attr(test, assert_instr(vprolvd))]
20007pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20008 let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
20009 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
20010}
20011
20012/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20013///
20014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
20015#[inline]
20016#[target_feature(enable = "avx512f,avx512vl")]
20017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20018#[cfg_attr(test, assert_instr(vprolvd))]
20019pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20020 let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
20021 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20022 transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20023}
20024
20025/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20026///
20027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
20028#[inline]
20029#[target_feature(enable = "avx512f")]
20030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20031#[cfg_attr(test, assert_instr(vprorvd))]
20032pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
20033 transmute(src:vprorvd(a:a.as_i32x16(), b:b.as_i32x16()))
20034}
20035
20036/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037///
20038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
20039#[inline]
20040#[target_feature(enable = "avx512f")]
20041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20042#[cfg_attr(test, assert_instr(vprorvd))]
20043pub unsafe fn _mm512_mask_rorv_epi32(
20044 src: __m512i,
20045 k: __mmask16,
20046 a: __m512i,
20047 b: __m512i,
20048) -> __m512i {
20049 let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
20050 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
20051}
20052
20053/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20054///
20055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
20056#[inline]
20057#[target_feature(enable = "avx512f")]
20058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20059#[cfg_attr(test, assert_instr(vprorvd))]
20060pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
20061 let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
20062 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
20063 transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20064}
20065
20066/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20067///
20068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
20069#[inline]
20070#[target_feature(enable = "avx512f,avx512vl")]
20071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20072#[cfg_attr(test, assert_instr(vprorvd))]
20073pub unsafe fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
20074 transmute(src:vprorvd256(a:a.as_i32x8(), b:b.as_i32x8()))
20075}
20076
20077/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20078///
20079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
20080#[inline]
20081#[target_feature(enable = "avx512f,avx512vl")]
20082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20083#[cfg_attr(test, assert_instr(vprorvd))]
20084pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20085 let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
20086 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
20087}
20088
20089/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20090///
20091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
20092#[inline]
20093#[target_feature(enable = "avx512f,avx512vl")]
20094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20095#[cfg_attr(test, assert_instr(vprorvd))]
20096pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20097 let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
20098 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
20099 transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20100}
20101
20102/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20103///
20104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
20105#[inline]
20106#[target_feature(enable = "avx512f,avx512vl")]
20107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20108#[cfg_attr(test, assert_instr(vprorvd))]
20109pub unsafe fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
20110 transmute(src:vprorvd128(a:a.as_i32x4(), b:b.as_i32x4()))
20111}
20112
20113/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20114///
20115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
20116#[inline]
20117#[target_feature(enable = "avx512f,avx512vl")]
20118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20119#[cfg_attr(test, assert_instr(vprorvd))]
20120pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20121 let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
20122 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
20123}
20124
20125/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20126///
20127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
20128#[inline]
20129#[target_feature(enable = "avx512f,avx512vl")]
20130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20131#[cfg_attr(test, assert_instr(vprorvd))]
20132pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20133 let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
20134 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20135 transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20136}
20137
20138/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20139///
20140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
20141#[inline]
20142#[target_feature(enable = "avx512f")]
20143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20144#[cfg_attr(test, assert_instr(vprolvq))]
20145pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
20146 transmute(src:vprolvq(a:a.as_i64x8(), b:b.as_i64x8()))
20147}
20148
20149/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20150///
20151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
20152#[inline]
20153#[target_feature(enable = "avx512f")]
20154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20155#[cfg_attr(test, assert_instr(vprolvq))]
20156pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20157 let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
20158 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
20159}
20160
20161/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20162///
20163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
20164#[inline]
20165#[target_feature(enable = "avx512f")]
20166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20167#[cfg_attr(test, assert_instr(vprolvq))]
20168pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20169 let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
20170 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20171 transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20172}
20173
20174/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20175///
20176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
20177#[inline]
20178#[target_feature(enable = "avx512f,avx512vl")]
20179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20180#[cfg_attr(test, assert_instr(vprolvq))]
20181pub unsafe fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
20182 transmute(src:vprolvq256(a:a.as_i64x4(), b:b.as_i64x4()))
20183}
20184
20185/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20186///
20187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
20188#[inline]
20189#[target_feature(enable = "avx512f,avx512vl")]
20190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20191#[cfg_attr(test, assert_instr(vprolvq))]
20192pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20193 let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
20194 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
20195}
20196
20197/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20198///
20199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
20200#[inline]
20201#[target_feature(enable = "avx512f,avx512vl")]
20202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20203#[cfg_attr(test, assert_instr(vprolvq))]
20204pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20205 let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
20206 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20207 transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20208}
20209
20210/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20211///
20212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
20213#[inline]
20214#[target_feature(enable = "avx512f,avx512vl")]
20215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20216#[cfg_attr(test, assert_instr(vprolvq))]
20217pub unsafe fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
20218 transmute(src:vprolvq128(a:a.as_i64x2(), b:b.as_i64x2()))
20219}
20220
20221/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20222///
20223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
20224#[inline]
20225#[target_feature(enable = "avx512f,avx512vl")]
20226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20227#[cfg_attr(test, assert_instr(vprolvq))]
20228pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20229 let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
20230 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
20231}
20232
20233/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20234///
20235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
20236#[inline]
20237#[target_feature(enable = "avx512f,avx512vl")]
20238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20239#[cfg_attr(test, assert_instr(vprolvq))]
20240pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20241 let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
20242 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20243 transmute(src:simd_select_bitmask(m:k, yes:rol, no:zero))
20244}
20245
20246/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20247///
20248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
20249#[inline]
20250#[target_feature(enable = "avx512f")]
20251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252#[cfg_attr(test, assert_instr(vprorvq))]
20253pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
20254 transmute(src:vprorvq(a:a.as_i64x8(), b:b.as_i64x8()))
20255}
20256
20257/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20258///
20259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
20260#[inline]
20261#[target_feature(enable = "avx512f")]
20262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20263#[cfg_attr(test, assert_instr(vprorvq))]
20264pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20265 let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
20266 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
20267}
20268
20269/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20270///
20271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
20272#[inline]
20273#[target_feature(enable = "avx512f")]
20274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20275#[cfg_attr(test, assert_instr(vprorvq))]
20276pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20277 let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
20278 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20279 transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20280}
20281
20282/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20283///
20284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
20285#[inline]
20286#[target_feature(enable = "avx512f,avx512vl")]
20287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20288#[cfg_attr(test, assert_instr(vprorvq))]
20289pub unsafe fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
20290 transmute(src:vprorvq256(a:a.as_i64x4(), b:b.as_i64x4()))
20291}
20292
20293/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20294///
20295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
20296#[inline]
20297#[target_feature(enable = "avx512f,avx512vl")]
20298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20299#[cfg_attr(test, assert_instr(vprorvq))]
20300pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20301 let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
20302 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
20303}
20304
20305/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20306///
20307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
20308#[inline]
20309#[target_feature(enable = "avx512f,avx512vl")]
20310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20311#[cfg_attr(test, assert_instr(vprorvq))]
20312pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20313 let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
20314 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20315 transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20316}
20317
20318/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20319///
20320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
20321#[inline]
20322#[target_feature(enable = "avx512f,avx512vl")]
20323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20324#[cfg_attr(test, assert_instr(vprorvq))]
20325pub unsafe fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
20326 transmute(src:vprorvq128(a:a.as_i64x2(), b:b.as_i64x2()))
20327}
20328
20329/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20330///
20331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
20332#[inline]
20333#[target_feature(enable = "avx512f,avx512vl")]
20334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20335#[cfg_attr(test, assert_instr(vprorvq))]
20336pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20337 let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
20338 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
20339}
20340
20341/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20342///
20343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
20344#[inline]
20345#[target_feature(enable = "avx512f,avx512vl")]
20346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347#[cfg_attr(test, assert_instr(vprorvq))]
20348pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20349 let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
20350 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20351 transmute(src:simd_select_bitmask(m:k, yes:ror, no:zero))
20352}
20353
20354/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20355///
20356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
20357#[inline]
20358#[target_feature(enable = "avx512f")]
20359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20360#[cfg_attr(test, assert_instr(vpsllvd))]
20361pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
20362 transmute(src:vpsllvd(a:a.as_i32x16(), b:count.as_i32x16()))
20363}
20364
20365/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20366///
20367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
20368#[inline]
20369#[target_feature(enable = "avx512f")]
20370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20371#[cfg_attr(test, assert_instr(vpsllvd))]
20372pub unsafe fn _mm512_mask_sllv_epi32(
20373 src: __m512i,
20374 k: __mmask16,
20375 a: __m512i,
20376 count: __m512i,
20377) -> __m512i {
20378 let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
20379 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20380}
20381
20382/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20383///
20384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
20385#[inline]
20386#[target_feature(enable = "avx512f")]
20387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20388#[cfg_attr(test, assert_instr(vpsllvd))]
20389pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20390 let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
20391 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
20392 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20393}
20394
20395/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20396///
20397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
20398#[inline]
20399#[target_feature(enable = "avx512f,avx512vl")]
20400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20401#[cfg_attr(test, assert_instr(vpsllvd))]
20402pub unsafe fn _mm256_mask_sllv_epi32(
20403 src: __m256i,
20404 k: __mmask8,
20405 a: __m256i,
20406 count: __m256i,
20407) -> __m256i {
20408 let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
20409 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20410}
20411
20412/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20413///
20414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
20415#[inline]
20416#[target_feature(enable = "avx512f,avx512vl")]
20417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20418#[cfg_attr(test, assert_instr(vpsllvd))]
20419pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20420 let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
20421 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
20422 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20423}
20424
20425/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20426///
20427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
20428#[inline]
20429#[target_feature(enable = "avx512f,avx512vl")]
20430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20431#[cfg_attr(test, assert_instr(vpsllvd))]
20432pub unsafe fn _mm_mask_sllv_epi32(
20433 src: __m128i,
20434 k: __mmask8,
20435 a: __m128i,
20436 count: __m128i,
20437) -> __m128i {
20438 let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
20439 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20440}
20441
20442/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20443///
20444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
20445#[inline]
20446#[target_feature(enable = "avx512f,avx512vl")]
20447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20448#[cfg_attr(test, assert_instr(vpsllvd))]
20449pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20450 let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
20451 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20452 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20453}
20454
20455/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20456///
20457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
20458#[inline]
20459#[target_feature(enable = "avx512f")]
20460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20461#[cfg_attr(test, assert_instr(vpsrlvd))]
20462pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
20463 transmute(src:vpsrlvd(a:a.as_i32x16(), b:count.as_i32x16()))
20464}
20465
20466/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20467///
20468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
20469#[inline]
20470#[target_feature(enable = "avx512f")]
20471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20472#[cfg_attr(test, assert_instr(vpsrlvd))]
20473pub unsafe fn _mm512_mask_srlv_epi32(
20474 src: __m512i,
20475 k: __mmask16,
20476 a: __m512i,
20477 count: __m512i,
20478) -> __m512i {
20479 let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
20480 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20481}
20482
20483/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20484///
20485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
20486#[inline]
20487#[target_feature(enable = "avx512f")]
20488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20489#[cfg_attr(test, assert_instr(vpsrlvd))]
20490pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20491 let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
20492 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
20493 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20494}
20495
20496/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20497///
20498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
20499#[inline]
20500#[target_feature(enable = "avx512f,avx512vl")]
20501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20502#[cfg_attr(test, assert_instr(vpsrlvd))]
20503pub unsafe fn _mm256_mask_srlv_epi32(
20504 src: __m256i,
20505 k: __mmask8,
20506 a: __m256i,
20507 count: __m256i,
20508) -> __m256i {
20509 let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
20510 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20511}
20512
20513/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20514///
20515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
20516#[inline]
20517#[target_feature(enable = "avx512f,avx512vl")]
20518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20519#[cfg_attr(test, assert_instr(vpsrlvd))]
20520pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20521 let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
20522 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
20523 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20524}
20525
20526/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20527///
20528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
20529#[inline]
20530#[target_feature(enable = "avx512f,avx512vl")]
20531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20532#[cfg_attr(test, assert_instr(vpsrlvd))]
20533pub unsafe fn _mm_mask_srlv_epi32(
20534 src: __m128i,
20535 k: __mmask8,
20536 a: __m128i,
20537 count: __m128i,
20538) -> __m128i {
20539 let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
20540 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20541}
20542
20543/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20544///
20545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
20546#[inline]
20547#[target_feature(enable = "avx512f,avx512vl")]
20548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20549#[cfg_attr(test, assert_instr(vpsrlvd))]
20550pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20551 let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
20552 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
20553 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20554}
20555
20556/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
20559#[inline]
20560#[target_feature(enable = "avx512f")]
20561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20562#[cfg_attr(test, assert_instr(vpsllvq))]
20563pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
20564 transmute(src:vpsllvq(a:a.as_i64x8(), b:count.as_i64x8()))
20565}
20566
20567/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20568///
20569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
20570#[inline]
20571#[target_feature(enable = "avx512f")]
20572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20573#[cfg_attr(test, assert_instr(vpsllvq))]
20574pub unsafe fn _mm512_mask_sllv_epi64(
20575 src: __m512i,
20576 k: __mmask8,
20577 a: __m512i,
20578 count: __m512i,
20579) -> __m512i {
20580 let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
20581 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20582}
20583
20584/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20585///
20586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
20587#[inline]
20588#[target_feature(enable = "avx512f")]
20589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590#[cfg_attr(test, assert_instr(vpsllvq))]
20591pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
20592 let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
20593 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20594 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20595}
20596
20597/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20598///
20599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
20600#[inline]
20601#[target_feature(enable = "avx512f,avx512vl")]
20602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20603#[cfg_attr(test, assert_instr(vpsllvq))]
20604pub unsafe fn _mm256_mask_sllv_epi64(
20605 src: __m256i,
20606 k: __mmask8,
20607 a: __m256i,
20608 count: __m256i,
20609) -> __m256i {
20610 let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
20611 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20612}
20613
20614/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20615///
20616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
20617#[inline]
20618#[target_feature(enable = "avx512f,avx512vl")]
20619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20620#[cfg_attr(test, assert_instr(vpsllvq))]
20621pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20622 let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
20623 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20624 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20625}
20626
20627/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20628///
20629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
20630#[inline]
20631#[target_feature(enable = "avx512f,avx512vl")]
20632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20633#[cfg_attr(test, assert_instr(vpsllvq))]
20634pub unsafe fn _mm_mask_sllv_epi64(
20635 src: __m128i,
20636 k: __mmask8,
20637 a: __m128i,
20638 count: __m128i,
20639) -> __m128i {
20640 let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
20641 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20642}
20643
20644/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20645///
20646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
20647#[inline]
20648#[target_feature(enable = "avx512f,avx512vl")]
20649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20650#[cfg_attr(test, assert_instr(vpsllvq))]
20651pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20652 let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
20653 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20654 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20655}
20656
20657/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20658///
20659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
20660#[inline]
20661#[target_feature(enable = "avx512f")]
20662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20663#[cfg_attr(test, assert_instr(vpsrlvq))]
20664pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
20665 transmute(src:vpsrlvq(a:a.as_i64x8(), b:count.as_i64x8()))
20666}
20667
20668/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20669///
20670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
20671#[inline]
20672#[target_feature(enable = "avx512f")]
20673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20674#[cfg_attr(test, assert_instr(vpsrlvq))]
20675pub unsafe fn _mm512_mask_srlv_epi64(
20676 src: __m512i,
20677 k: __mmask8,
20678 a: __m512i,
20679 count: __m512i,
20680) -> __m512i {
20681 let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
20682 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20683}
20684
20685/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20686///
20687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
20688#[inline]
20689#[target_feature(enable = "avx512f")]
20690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20691#[cfg_attr(test, assert_instr(vpsrlvq))]
20692pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
20693 let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
20694 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
20695 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20696}
20697
20698/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20699///
20700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
20701#[inline]
20702#[target_feature(enable = "avx512f,avx512vl")]
20703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20704#[cfg_attr(test, assert_instr(vpsrlvq))]
20705pub unsafe fn _mm256_mask_srlv_epi64(
20706 src: __m256i,
20707 k: __mmask8,
20708 a: __m256i,
20709 count: __m256i,
20710) -> __m256i {
20711 let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
20712 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20713}
20714
20715/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20716///
20717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
20718#[inline]
20719#[target_feature(enable = "avx512f,avx512vl")]
20720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20721#[cfg_attr(test, assert_instr(vpsrlvq))]
20722pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20723 let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
20724 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
20725 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20726}
20727
20728/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20729///
20730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
20731#[inline]
20732#[target_feature(enable = "avx512f,avx512vl")]
20733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20734#[cfg_attr(test, assert_instr(vpsrlvq))]
20735pub unsafe fn _mm_mask_srlv_epi64(
20736 src: __m128i,
20737 k: __mmask8,
20738 a: __m128i,
20739 count: __m128i,
20740) -> __m128i {
20741 let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
20742 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20743}
20744
20745/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20746///
20747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
20748#[inline]
20749#[target_feature(enable = "avx512f,avx512vl")]
20750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20751#[cfg_attr(test, assert_instr(vpsrlvq))]
20752pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20753 let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
20754 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
20755 transmute(src:simd_select_bitmask(m:k, yes:shf, no:zero))
20756}
20757
20758/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
20759///
20760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
20761#[inline]
20762#[target_feature(enable = "avx512f")]
20763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20764#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20765#[rustc_legacy_const_generics(1)]
20766pub unsafe fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
20767 static_assert_uimm_bits!(MASK, 8);
20768 simd_shuffle!(
20769 a,
20770 a,
20771 [
20772 MASK as u32 & 0b11,
20773 (MASK as u32 >> 2) & 0b11,
20774 ((MASK as u32 >> 4) & 0b11),
20775 ((MASK as u32 >> 6) & 0b11),
20776 (MASK as u32 & 0b11) + 4,
20777 ((MASK as u32 >> 2) & 0b11) + 4,
20778 ((MASK as u32 >> 4) & 0b11) + 4,
20779 ((MASK as u32 >> 6) & 0b11) + 4,
20780 (MASK as u32 & 0b11) + 8,
20781 ((MASK as u32 >> 2) & 0b11) + 8,
20782 ((MASK as u32 >> 4) & 0b11) + 8,
20783 ((MASK as u32 >> 6) & 0b11) + 8,
20784 (MASK as u32 & 0b11) + 12,
20785 ((MASK as u32 >> 2) & 0b11) + 12,
20786 ((MASK as u32 >> 4) & 0b11) + 12,
20787 ((MASK as u32 >> 6) & 0b11) + 12,
20788 ],
20789 )
20790}
20791
20792/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20793///
20794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
20795#[inline]
20796#[target_feature(enable = "avx512f")]
20797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20798#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20799#[rustc_legacy_const_generics(3)]
20800pub unsafe fn _mm512_mask_permute_ps<const MASK: i32>(
20801 src: __m512,
20802 k: __mmask16,
20803 a: __m512,
20804) -> __m512 {
20805 static_assert_uimm_bits!(MASK, 8);
20806 let r: __m512 = _mm512_permute_ps::<MASK>(a);
20807 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
20808}
20809
20810/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20811///
20812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
20813#[inline]
20814#[target_feature(enable = "avx512f")]
20815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20816#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20817#[rustc_legacy_const_generics(2)]
20818pub unsafe fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
20819 static_assert_uimm_bits!(MASK, 8);
20820 let r: __m512 = _mm512_permute_ps::<MASK>(a);
20821 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
20822 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
20823}
20824
20825/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20826///
20827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
20828#[inline]
20829#[target_feature(enable = "avx512f,avx512vl")]
20830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20831#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20832#[rustc_legacy_const_generics(3)]
20833pub unsafe fn _mm256_mask_permute_ps<const MASK: i32>(
20834 src: __m256,
20835 k: __mmask8,
20836 a: __m256,
20837) -> __m256 {
20838 let r: __m256 = _mm256_permute_ps::<MASK>(a);
20839 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
20840}
20841
20842/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20843///
20844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
20845#[inline]
20846#[target_feature(enable = "avx512f,avx512vl")]
20847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20848#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20849#[rustc_legacy_const_generics(2)]
20850pub unsafe fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
20851 let r: __m256 = _mm256_permute_ps::<MASK>(a);
20852 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
20853 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
20854}
20855
20856/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20857///
20858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
20859#[inline]
20860#[target_feature(enable = "avx512f,avx512vl")]
20861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20862#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20863#[rustc_legacy_const_generics(3)]
20864pub unsafe fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
20865 let r: __m128 = _mm_permute_ps::<MASK>(a);
20866 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
20867}
20868
20869/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20870///
20871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
20872#[inline]
20873#[target_feature(enable = "avx512f,avx512vl")]
20874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20875#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20876#[rustc_legacy_const_generics(2)]
20877pub unsafe fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
20878 let r: __m128 = _mm_permute_ps::<MASK>(a);
20879 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
20880 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
20881}
20882
20883/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
20884///
20885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
20886#[inline]
20887#[target_feature(enable = "avx512f")]
20888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20889#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
20890#[rustc_legacy_const_generics(1)]
20891pub unsafe fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
20892 static_assert_uimm_bits!(MASK, 8);
20893 simd_shuffle!(
20894 a,
20895 a,
20896 [
20897 MASK as u32 & 0b1,
20898 ((MASK as u32 >> 1) & 0b1),
20899 ((MASK as u32 >> 2) & 0b1) + 2,
20900 ((MASK as u32 >> 3) & 0b1) + 2,
20901 ((MASK as u32 >> 4) & 0b1) + 4,
20902 ((MASK as u32 >> 5) & 0b1) + 4,
20903 ((MASK as u32 >> 6) & 0b1) + 6,
20904 ((MASK as u32 >> 7) & 0b1) + 6,
20905 ],
20906 )
20907}
20908
20909/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20910///
20911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
20912#[inline]
20913#[target_feature(enable = "avx512f")]
20914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20915#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
20916#[rustc_legacy_const_generics(3)]
20917pub unsafe fn _mm512_mask_permute_pd<const MASK: i32>(
20918 src: __m512d,
20919 k: __mmask8,
20920 a: __m512d,
20921) -> __m512d {
20922 static_assert_uimm_bits!(MASK, 8);
20923 let r: __m512d = _mm512_permute_pd::<MASK>(a);
20924 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
20925}
20926
20927/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20928///
20929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
20930#[inline]
20931#[target_feature(enable = "avx512f")]
20932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20933#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
20934#[rustc_legacy_const_generics(2)]
20935pub unsafe fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
20936 static_assert_uimm_bits!(MASK, 8);
20937 let r: __m512d = _mm512_permute_pd::<MASK>(a);
20938 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
20939 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
20940}
20941
20942/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20943///
20944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
20945#[inline]
20946#[target_feature(enable = "avx512f,avx512vl")]
20947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20948#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
20949#[rustc_legacy_const_generics(3)]
20950pub unsafe fn _mm256_mask_permute_pd<const MASK: i32>(
20951 src: __m256d,
20952 k: __mmask8,
20953 a: __m256d,
20954) -> __m256d {
20955 static_assert_uimm_bits!(MASK, 4);
20956 let r: __m256d = _mm256_permute_pd::<MASK>(a);
20957 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
20958}
20959
20960/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20961///
20962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
20963#[inline]
20964#[target_feature(enable = "avx512f,avx512vl")]
20965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20966#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
20967#[rustc_legacy_const_generics(2)]
20968pub unsafe fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
20969 static_assert_uimm_bits!(MASK, 4);
20970 let r: __m256d = _mm256_permute_pd::<MASK>(a);
20971 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
20972 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
20973}
20974
20975/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20976///
20977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
20978#[inline]
20979#[target_feature(enable = "avx512f,avx512vl")]
20980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20981#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
20982#[rustc_legacy_const_generics(3)]
20983pub unsafe fn _mm_mask_permute_pd<const IMM2: i32>(
20984 src: __m128d,
20985 k: __mmask8,
20986 a: __m128d,
20987) -> __m128d {
20988 static_assert_uimm_bits!(IMM2, 2);
20989 let r: __m128d = _mm_permute_pd::<IMM2>(a);
20990 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
20991}
20992
20993/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20994///
20995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
20996#[inline]
20997#[target_feature(enable = "avx512f,avx512vl")]
20998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20999#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
21000#[rustc_legacy_const_generics(2)]
21001pub unsafe fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
21002 static_assert_uimm_bits!(IMM2, 2);
21003 let r: __m128d = _mm_permute_pd::<IMM2>(a);
21004 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
21005 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:zero))
21006}
21007
21008/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
21009///
21010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
21011#[inline]
21012#[target_feature(enable = "avx512f")]
21013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21014#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21015#[rustc_legacy_const_generics(1)]
21016pub unsafe fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
21017 static_assert_uimm_bits!(MASK, 8);
21018 simd_shuffle!(
21019 a,
21020 a,
21021 [
21022 MASK as u32 & 0b11,
21023 (MASK as u32 >> 2) & 0b11,
21024 ((MASK as u32 >> 4) & 0b11),
21025 ((MASK as u32 >> 6) & 0b11),
21026 (MASK as u32 & 0b11) + 4,
21027 ((MASK as u32 >> 2) & 0b11) + 4,
21028 ((MASK as u32 >> 4) & 0b11) + 4,
21029 ((MASK as u32 >> 6) & 0b11) + 4,
21030 ],
21031 )
21032}
21033
21034/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21035///
21036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
21037#[inline]
21038#[target_feature(enable = "avx512f")]
21039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21040#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21041#[rustc_legacy_const_generics(3)]
21042pub unsafe fn _mm512_mask_permutex_epi64<const MASK: i32>(
21043 src: __m512i,
21044 k: __mmask8,
21045 a: __m512i,
21046) -> __m512i {
21047 static_assert_uimm_bits!(MASK, 8);
21048 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
21049 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
21050}
21051
21052/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21053///
21054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
21055#[inline]
21056#[target_feature(enable = "avx512f")]
21057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21058#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21059#[rustc_legacy_const_generics(2)]
21060pub unsafe fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
21061 static_assert_uimm_bits!(MASK, 8);
21062 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
21063 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
21064 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
21065}
21066
21067/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
21068///
21069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
21070#[inline]
21071#[target_feature(enable = "avx512f,avx512vl")]
21072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21073#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21074#[rustc_legacy_const_generics(1)]
21075pub unsafe fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
21076 static_assert_uimm_bits!(MASK, 8);
21077 simd_shuffle!(
21078 a,
21079 a,
21080 [
21081 MASK as u32 & 0b11,
21082 (MASK as u32 >> 2) & 0b11,
21083 ((MASK as u32 >> 4) & 0b11),
21084 ((MASK as u32 >> 6) & 0b11),
21085 ],
21086 )
21087}
21088
21089/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21090///
21091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi6&expand=4203)
21092#[inline]
21093#[target_feature(enable = "avx512f,avx512vl")]
21094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21095#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21096#[rustc_legacy_const_generics(3)]
21097pub unsafe fn _mm256_mask_permutex_epi64<const MASK: i32>(
21098 src: __m256i,
21099 k: __mmask8,
21100 a: __m256i,
21101) -> __m256i {
21102 static_assert_uimm_bits!(MASK, 8);
21103 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
21104 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
21105}
21106
21107/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21108///
21109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
21110#[inline]
21111#[target_feature(enable = "avx512f,avx512vl")]
21112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21113#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21114#[rustc_legacy_const_generics(2)]
21115pub unsafe fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
21116 static_assert_uimm_bits!(MASK, 8);
21117 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
21118 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
21119 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
21120}
21121
21122/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
21123///
21124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
21125#[inline]
21126#[target_feature(enable = "avx512f")]
21127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21128#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21129#[rustc_legacy_const_generics(1)]
21130pub unsafe fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
21131 static_assert_uimm_bits!(MASK, 8);
21132 simd_shuffle!(
21133 a,
21134 a,
21135 [
21136 MASK as u32 & 0b11,
21137 (MASK as u32 >> 2) & 0b11,
21138 ((MASK as u32 >> 4) & 0b11),
21139 ((MASK as u32 >> 6) & 0b11),
21140 (MASK as u32 & 0b11) + 4,
21141 ((MASK as u32 >> 2) & 0b11) + 4,
21142 ((MASK as u32 >> 4) & 0b11) + 4,
21143 ((MASK as u32 >> 6) & 0b11) + 4,
21144 ],
21145 )
21146}
21147
21148/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21149///
21150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
21151#[inline]
21152#[target_feature(enable = "avx512f")]
21153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21154#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21155#[rustc_legacy_const_generics(3)]
21156pub unsafe fn _mm512_mask_permutex_pd<const MASK: i32>(
21157 src: __m512d,
21158 k: __mmask8,
21159 a: __m512d,
21160) -> __m512d {
21161 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
21162 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
21163}
21164
21165/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21166///
21167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
21168#[inline]
21169#[target_feature(enable = "avx512f")]
21170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21171#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21172#[rustc_legacy_const_generics(2)]
21173pub unsafe fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
21174 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
21175 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
21176 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
21177}
21178
21179/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
21180///
21181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
21182#[inline]
21183#[target_feature(enable = "avx512f,avx512vl")]
21184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21185#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21186#[rustc_legacy_const_generics(1)]
21187pub unsafe fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
21188 static_assert_uimm_bits!(MASK, 8);
21189 simd_shuffle!(
21190 a,
21191 a,
21192 [
21193 MASK as u32 & 0b11,
21194 (MASK as u32 >> 2) & 0b11,
21195 ((MASK as u32 >> 4) & 0b11),
21196 ((MASK as u32 >> 6) & 0b11),
21197 ],
21198 )
21199}
21200
21201/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21202///
21203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
21204#[inline]
21205#[target_feature(enable = "avx512f,avx512vl")]
21206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21207#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21208#[rustc_legacy_const_generics(3)]
21209pub unsafe fn _mm256_mask_permutex_pd<const MASK: i32>(
21210 src: __m256d,
21211 k: __mmask8,
21212 a: __m256d,
21213) -> __m256d {
21214 static_assert_uimm_bits!(MASK, 8);
21215 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
21216 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
21217}
21218
21219/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21220///
21221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
21222#[inline]
21223#[target_feature(enable = "avx512f,avx512vl")]
21224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21225#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21226#[rustc_legacy_const_generics(2)]
21227pub unsafe fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
21228 static_assert_uimm_bits!(MASK, 8);
21229 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
21230 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
21231 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
21232}
21233
21234/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
21235///
21236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
21237#[inline]
21238#[target_feature(enable = "avx512f")]
21239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21240#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21241pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
21242 transmute(src:vpermd(a:a.as_i32x16(), idx:idx.as_i32x16()))
21243}
21244
21245/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
21246///
21247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
21248#[inline]
21249#[target_feature(enable = "avx512f")]
21250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21251#[cfg_attr(test, assert_instr(vpermd))]
21252pub unsafe fn _mm512_mask_permutevar_epi32(
21253 src: __m512i,
21254 k: __mmask16,
21255 idx: __m512i,
21256 a: __m512i,
21257) -> __m512i {
21258 let permute: i32x16 = _mm512_permutevar_epi32(idx, a).as_i32x16();
21259 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
21260}
21261
21262/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
21263///
21264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
21265#[inline]
21266#[target_feature(enable = "avx512f")]
21267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21268#[cfg_attr(test, assert_instr(vpermilps))]
21269pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
21270 transmute(src:vpermilps(a:a.as_f32x16(), b:b.as_i32x16()))
21271}
21272
21273/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21274///
21275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
21276#[inline]
21277#[target_feature(enable = "avx512f")]
21278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21279#[cfg_attr(test, assert_instr(vpermilps))]
21280pub unsafe fn _mm512_mask_permutevar_ps(
21281 src: __m512,
21282 k: __mmask16,
21283 a: __m512,
21284 b: __m512i,
21285) -> __m512 {
21286 let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
21287 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
21288}
21289
21290/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21291///
21292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
21293#[inline]
21294#[target_feature(enable = "avx512f")]
21295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21296#[cfg_attr(test, assert_instr(vpermilps))]
21297pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
21298 let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
21299 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
21300 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21301}
21302
21303/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21304///
21305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
21306#[inline]
21307#[target_feature(enable = "avx512f,avx512vl")]
21308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21309#[cfg_attr(test, assert_instr(vpermilps))]
21310pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
21311 let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
21312 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
21313}
21314
21315/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21316///
21317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
21318#[inline]
21319#[target_feature(enable = "avx512f,avx512vl")]
21320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21321#[cfg_attr(test, assert_instr(vpermilps))]
21322pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
21323 let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
21324 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
21325 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21326}
21327
21328/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21329///
21330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
21331#[inline]
21332#[target_feature(enable = "avx512f,avx512vl")]
21333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21334#[cfg_attr(test, assert_instr(vpermilps))]
21335pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
21336 let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
21337 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
21338}
21339
21340/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21341///
21342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
21343#[inline]
21344#[target_feature(enable = "avx512f,avx512vl")]
21345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21346#[cfg_attr(test, assert_instr(vpermilps))]
21347pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
21348 let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
21349 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
21350 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21351}
21352
21353/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
21354///
21355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
21356#[inline]
21357#[target_feature(enable = "avx512f")]
21358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21359#[cfg_attr(test, assert_instr(vpermilpd))]
21360pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
21361 transmute(src:vpermilpd(a:a.as_f64x8(), b:b.as_i64x8()))
21362}
21363
21364/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21365///
21366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
21367#[inline]
21368#[target_feature(enable = "avx512f")]
21369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21370#[cfg_attr(test, assert_instr(vpermilpd))]
21371pub unsafe fn _mm512_mask_permutevar_pd(
21372 src: __m512d,
21373 k: __mmask8,
21374 a: __m512d,
21375 b: __m512i,
21376) -> __m512d {
21377 let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
21378 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
21379}
21380
21381/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21382///
21383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
21384#[inline]
21385#[target_feature(enable = "avx512f")]
21386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21387#[cfg_attr(test, assert_instr(vpermilpd))]
21388pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
21389 let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
21390 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
21391 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21392}
21393
21394/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21395///
21396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
21397#[inline]
21398#[target_feature(enable = "avx512f,avx512vl")]
21399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21400#[cfg_attr(test, assert_instr(vpermilpd))]
21401pub unsafe fn _mm256_mask_permutevar_pd(
21402 src: __m256d,
21403 k: __mmask8,
21404 a: __m256d,
21405 b: __m256i,
21406) -> __m256d {
21407 let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
21408 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
21409}
21410
21411/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21412///
21413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
21414#[inline]
21415#[target_feature(enable = "avx512f,avx512vl")]
21416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21417#[cfg_attr(test, assert_instr(vpermilpd))]
21418pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
21419 let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
21420 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
21421 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21422}
21423
21424/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21425///
21426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
21427#[inline]
21428#[target_feature(enable = "avx512f,avx512vl")]
21429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21430#[cfg_attr(test, assert_instr(vpermilpd))]
21431pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
21432 let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
21433 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
21434}
21435
21436/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21437///
21438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
21439#[inline]
21440#[target_feature(enable = "avx512f,avx512vl")]
21441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21442#[cfg_attr(test, assert_instr(vpermilpd))]
21443pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
21444 let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
21445 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
21446 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21447}
21448
21449/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21450///
21451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
21452#[inline]
21453#[target_feature(enable = "avx512f")]
21454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21455#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21456pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
21457 transmute(src:vpermd(a:a.as_i32x16(), idx:idx.as_i32x16()))
21458}
21459
21460/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21461///
21462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
21463#[inline]
21464#[target_feature(enable = "avx512f")]
21465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21466#[cfg_attr(test, assert_instr(vpermd))]
21467pub unsafe fn _mm512_mask_permutexvar_epi32(
21468 src: __m512i,
21469 k: __mmask16,
21470 idx: __m512i,
21471 a: __m512i,
21472) -> __m512i {
21473 let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
21474 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
21475}
21476
21477/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21478///
21479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
21480#[inline]
21481#[target_feature(enable = "avx512f")]
21482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21483#[cfg_attr(test, assert_instr(vpermd))]
21484pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
21485 let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
21486 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
21487 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21488}
21489
21490/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21497pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
21498 _mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
21499}
21500
21501/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21502///
21503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
21504#[inline]
21505#[target_feature(enable = "avx512f,avx512vl")]
21506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21507#[cfg_attr(test, assert_instr(vpermd))]
21508pub unsafe fn _mm256_mask_permutexvar_epi32(
21509 src: __m256i,
21510 k: __mmask8,
21511 idx: __m256i,
21512 a: __m256i,
21513) -> __m256i {
21514 let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
21515 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
21516}
21517
21518/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21519///
21520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
21521#[inline]
21522#[target_feature(enable = "avx512f,avx512vl")]
21523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21524#[cfg_attr(test, assert_instr(vpermd))]
21525pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
21526 let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
21527 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
21528 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21529}
21530
21531/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21532///
21533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
21534#[inline]
21535#[target_feature(enable = "avx512f")]
21536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21537#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
21538pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
21539 transmute(src:vpermq(a:a.as_i64x8(), idx:idx.as_i64x8()))
21540}
21541
21542/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21543///
21544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
21545#[inline]
21546#[target_feature(enable = "avx512f")]
21547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21548#[cfg_attr(test, assert_instr(vpermq))]
21549pub unsafe fn _mm512_mask_permutexvar_epi64(
21550 src: __m512i,
21551 k: __mmask8,
21552 idx: __m512i,
21553 a: __m512i,
21554) -> __m512i {
21555 let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
21556 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
21557}
21558
21559/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21560///
21561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
21562#[inline]
21563#[target_feature(enable = "avx512f")]
21564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21565#[cfg_attr(test, assert_instr(vpermq))]
21566pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
21567 let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
21568 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
21569 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21570}
21571
21572/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21573///
21574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
21575#[inline]
21576#[target_feature(enable = "avx512f,avx512vl")]
21577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21578#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
21579pub unsafe fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
21580 transmute(src:vpermq256(a:a.as_i64x4(), idx:idx.as_i64x4()))
21581}
21582
21583/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21584///
21585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
21586#[inline]
21587#[target_feature(enable = "avx512f,avx512vl")]
21588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21589#[cfg_attr(test, assert_instr(vpermq))]
21590pub unsafe fn _mm256_mask_permutexvar_epi64(
21591 src: __m256i,
21592 k: __mmask8,
21593 idx: __m256i,
21594 a: __m256i,
21595) -> __m256i {
21596 let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
21597 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
21598}
21599
21600/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601///
21602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
21603#[inline]
21604#[target_feature(enable = "avx512f,avx512vl")]
21605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21606#[cfg_attr(test, assert_instr(vpermq))]
21607pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
21608 let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
21609 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
21610 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21611}
21612
21613/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
21614///
21615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
21616#[inline]
21617#[target_feature(enable = "avx512f")]
21618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21619#[cfg_attr(test, assert_instr(vpermps))]
21620pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
21621 transmute(src:vpermps(a:a.as_f32x16(), idx:idx.as_i32x16()))
21622}
21623
21624/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21625///
21626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
21627#[inline]
21628#[target_feature(enable = "avx512f")]
21629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21630#[cfg_attr(test, assert_instr(vpermps))]
21631pub unsafe fn _mm512_mask_permutexvar_ps(
21632 src: __m512,
21633 k: __mmask16,
21634 idx: __m512i,
21635 a: __m512,
21636) -> __m512 {
21637 let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
21638 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
21639}
21640
21641/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21642///
21643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
21644#[inline]
21645#[target_feature(enable = "avx512f")]
21646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21647#[cfg_attr(test, assert_instr(vpermps))]
21648pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
21649 let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
21650 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
21651 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21652}
21653
21654/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
21655///
21656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
21657#[inline]
21658#[target_feature(enable = "avx512f,avx512vl")]
21659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21660#[cfg_attr(test, assert_instr(vpermps))]
21661pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
21662 _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
21663}
21664
21665/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21666///
21667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
21668#[inline]
21669#[target_feature(enable = "avx512f,avx512vl")]
21670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21671#[cfg_attr(test, assert_instr(vpermps))]
21672pub unsafe fn _mm256_mask_permutexvar_ps(
21673 src: __m256,
21674 k: __mmask8,
21675 idx: __m256i,
21676 a: __m256,
21677) -> __m256 {
21678 let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
21679 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
21680}
21681
21682/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683///
21684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
21685#[inline]
21686#[target_feature(enable = "avx512f,avx512vl")]
21687#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21688#[cfg_attr(test, assert_instr(vpermps))]
21689pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
21690 let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
21691 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
21692 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21693}
21694
21695/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
21696///
21697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
21698#[inline]
21699#[target_feature(enable = "avx512f")]
21700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21701#[cfg_attr(test, assert_instr(vpermpd))]
21702pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
21703 transmute(src:vpermpd(a:a.as_f64x8(), idx:idx.as_i64x8()))
21704}
21705
21706/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21707///
21708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
21709#[inline]
21710#[target_feature(enable = "avx512f")]
21711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21712#[cfg_attr(test, assert_instr(vpermpd))]
21713pub unsafe fn _mm512_mask_permutexvar_pd(
21714 src: __m512d,
21715 k: __mmask8,
21716 idx: __m512i,
21717 a: __m512d,
21718) -> __m512d {
21719 let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
21720 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
21721}
21722
21723/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21724///
21725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
21726#[inline]
21727#[target_feature(enable = "avx512f")]
21728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21729#[cfg_attr(test, assert_instr(vpermpd))]
21730pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
21731 let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
21732 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
21733 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21734}
21735
21736/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
21737///
21738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
21739#[inline]
21740#[target_feature(enable = "avx512f,avx512vl")]
21741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21742#[cfg_attr(test, assert_instr(vpermpd))]
21743pub unsafe fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
21744 transmute(src:vpermpd256(a:a.as_f64x4(), idx:idx.as_i64x4()))
21745}
21746
21747/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21748///
21749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
21750#[inline]
21751#[target_feature(enable = "avx512f,avx512vl")]
21752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21753#[cfg_attr(test, assert_instr(vpermpd))]
21754pub unsafe fn _mm256_mask_permutexvar_pd(
21755 src: __m256d,
21756 k: __mmask8,
21757 idx: __m256i,
21758 a: __m256d,
21759) -> __m256d {
21760 let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
21761 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
21762}
21763
21764/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21765///
21766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
21767#[inline]
21768#[target_feature(enable = "avx512f,avx512vl")]
21769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21770#[cfg_attr(test, assert_instr(vpermpd))]
21771pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
21772 let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
21773 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
21774 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21775}
21776
21777/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21778///
21779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
21780#[inline]
21781#[target_feature(enable = "avx512f")]
21782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21783#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21784pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
21785 transmute(src:vpermi2d(a:a.as_i32x16(), idx:idx.as_i32x16(), b:b.as_i32x16()))
21786}
21787
21788/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21789///
21790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
21791#[inline]
21792#[target_feature(enable = "avx512f")]
21793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21794#[cfg_attr(test, assert_instr(vpermt2d))]
21795pub unsafe fn _mm512_mask_permutex2var_epi32(
21796 a: __m512i,
21797 k: __mmask16,
21798 idx: __m512i,
21799 b: __m512i,
21800) -> __m512i {
21801 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21802 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
21803}
21804
21805/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21806///
21807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
21808#[inline]
21809#[target_feature(enable = "avx512f")]
21810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21812pub unsafe fn _mm512_maskz_permutex2var_epi32(
21813 k: __mmask16,
21814 a: __m512i,
21815 idx: __m512i,
21816 b: __m512i,
21817) -> __m512i {
21818 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21819 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
21820 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21821}
21822
21823/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21824///
21825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
21826#[inline]
21827#[target_feature(enable = "avx512f")]
21828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21829#[cfg_attr(test, assert_instr(vpermi2d))]
21830pub unsafe fn _mm512_mask2_permutex2var_epi32(
21831 a: __m512i,
21832 idx: __m512i,
21833 k: __mmask16,
21834 b: __m512i,
21835) -> __m512i {
21836 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21837 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
21838}
21839
21840/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21841///
21842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
21843#[inline]
21844#[target_feature(enable = "avx512f,avx512vl")]
21845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21846#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21847pub unsafe fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
21848 transmute(src:vpermi2d256(a:a.as_i32x8(), idx:idx.as_i32x8(), b:b.as_i32x8()))
21849}
21850
21851/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21852///
21853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
21854#[inline]
21855#[target_feature(enable = "avx512f,avx512vl")]
21856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21857#[cfg_attr(test, assert_instr(vpermt2d))]
21858pub unsafe fn _mm256_mask_permutex2var_epi32(
21859 a: __m256i,
21860 k: __mmask8,
21861 idx: __m256i,
21862 b: __m256i,
21863) -> __m256i {
21864 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21865 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
21866}
21867
21868/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21869///
21870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
21871#[inline]
21872#[target_feature(enable = "avx512f,avx512vl")]
21873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21874#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21875pub unsafe fn _mm256_maskz_permutex2var_epi32(
21876 k: __mmask8,
21877 a: __m256i,
21878 idx: __m256i,
21879 b: __m256i,
21880) -> __m256i {
21881 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21882 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
21883 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21884}
21885
21886/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21887///
21888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
21889#[inline]
21890#[target_feature(enable = "avx512f,avx512vl")]
21891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21892#[cfg_attr(test, assert_instr(vpermi2d))]
21893pub unsafe fn _mm256_mask2_permutex2var_epi32(
21894 a: __m256i,
21895 idx: __m256i,
21896 k: __mmask8,
21897 b: __m256i,
21898) -> __m256i {
21899 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21900 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
21901}
21902
21903/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21904///
21905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
21906#[inline]
21907#[target_feature(enable = "avx512f,avx512vl")]
21908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21909#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21910pub unsafe fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
21911 transmute(src:vpermi2d128(a:a.as_i32x4(), idx:idx.as_i32x4(), b:b.as_i32x4()))
21912}
21913
21914/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21915///
21916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
21917#[inline]
21918#[target_feature(enable = "avx512f,avx512vl")]
21919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920#[cfg_attr(test, assert_instr(vpermt2d))]
21921pub unsafe fn _mm_mask_permutex2var_epi32(
21922 a: __m128i,
21923 k: __mmask8,
21924 idx: __m128i,
21925 b: __m128i,
21926) -> __m128i {
21927 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21928 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
21929}
21930
21931/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21932///
21933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
21934#[inline]
21935#[target_feature(enable = "avx512f,avx512vl")]
21936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21937#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21938pub unsafe fn _mm_maskz_permutex2var_epi32(
21939 k: __mmask8,
21940 a: __m128i,
21941 idx: __m128i,
21942 b: __m128i,
21943) -> __m128i {
21944 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21945 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
21946 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
21947}
21948
21949/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21950///
21951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
21952#[inline]
21953#[target_feature(enable = "avx512f,avx512vl")]
21954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21955#[cfg_attr(test, assert_instr(vpermi2d))]
21956pub unsafe fn _mm_mask2_permutex2var_epi32(
21957 a: __m128i,
21958 idx: __m128i,
21959 k: __mmask8,
21960 b: __m128i,
21961) -> __m128i {
21962 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21963 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
21964}
21965
21966/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21967///
21968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
21969#[inline]
21970#[target_feature(enable = "avx512f")]
21971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21972#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
21973pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
21974 transmute(src:vpermi2q(a:a.as_i64x8(), idx:idx.as_i64x8(), b:b.as_i64x8()))
21975}
21976
21977/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21978///
21979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
21980#[inline]
21981#[target_feature(enable = "avx512f")]
21982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21983#[cfg_attr(test, assert_instr(vpermt2q))]
21984pub unsafe fn _mm512_mask_permutex2var_epi64(
21985 a: __m512i,
21986 k: __mmask8,
21987 idx: __m512i,
21988 b: __m512i,
21989) -> __m512i {
21990 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
21991 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
21992}
21993
21994/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21995///
21996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
21997#[inline]
21998#[target_feature(enable = "avx512f")]
21999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22000#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22001pub unsafe fn _mm512_maskz_permutex2var_epi64(
22002 k: __mmask8,
22003 a: __m512i,
22004 idx: __m512i,
22005 b: __m512i,
22006) -> __m512i {
22007 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22008 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
22009 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22010}
22011
22012/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22013///
22014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
22015#[inline]
22016#[target_feature(enable = "avx512f")]
22017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22018#[cfg_attr(test, assert_instr(vpermi2q))]
22019pub unsafe fn _mm512_mask2_permutex2var_epi64(
22020 a: __m512i,
22021 idx: __m512i,
22022 k: __mmask8,
22023 b: __m512i,
22024) -> __m512i {
22025 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22026 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
22027}
22028
22029/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22030///
22031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
22032#[inline]
22033#[target_feature(enable = "avx512f,avx512vl")]
22034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22035#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22036pub unsafe fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
22037 transmute(src:vpermi2q256(a:a.as_i64x4(), idx:idx.as_i64x4(), b:b.as_i64x4()))
22038}
22039
22040/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22041///
22042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
22043#[inline]
22044#[target_feature(enable = "avx512f,avx512vl")]
22045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22046#[cfg_attr(test, assert_instr(vpermt2q))]
22047pub unsafe fn _mm256_mask_permutex2var_epi64(
22048 a: __m256i,
22049 k: __mmask8,
22050 idx: __m256i,
22051 b: __m256i,
22052) -> __m256i {
22053 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22054 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
22055}
22056
22057/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22058///
22059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
22060#[inline]
22061#[target_feature(enable = "avx512f,avx512vl")]
22062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22063#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22064pub unsafe fn _mm256_maskz_permutex2var_epi64(
22065 k: __mmask8,
22066 a: __m256i,
22067 idx: __m256i,
22068 b: __m256i,
22069) -> __m256i {
22070 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22071 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
22072 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22073}
22074
22075/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22076///
22077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
22078#[inline]
22079#[target_feature(enable = "avx512f,avx512vl")]
22080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22081#[cfg_attr(test, assert_instr(vpermi2q))]
22082pub unsafe fn _mm256_mask2_permutex2var_epi64(
22083 a: __m256i,
22084 idx: __m256i,
22085 k: __mmask8,
22086 b: __m256i,
22087) -> __m256i {
22088 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22089 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
22090}
22091
22092/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22093///
22094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
22095#[inline]
22096#[target_feature(enable = "avx512f,avx512vl")]
22097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22098#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22099pub unsafe fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
22100 transmute(src:vpermi2q128(a:a.as_i64x2(), idx:idx.as_i64x2(), b:b.as_i64x2()))
22101}
22102
22103/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22104///
22105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
22106#[inline]
22107#[target_feature(enable = "avx512f,avx512vl")]
22108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22109#[cfg_attr(test, assert_instr(vpermt2q))]
22110pub unsafe fn _mm_mask_permutex2var_epi64(
22111 a: __m128i,
22112 k: __mmask8,
22113 idx: __m128i,
22114 b: __m128i,
22115) -> __m128i {
22116 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22117 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
22118}
22119
22120/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22121///
22122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
22123#[inline]
22124#[target_feature(enable = "avx512f,avx512vl")]
22125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22126#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22127pub unsafe fn _mm_maskz_permutex2var_epi64(
22128 k: __mmask8,
22129 a: __m128i,
22130 idx: __m128i,
22131 b: __m128i,
22132) -> __m128i {
22133 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22134 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
22135 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22136}
22137
22138/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22139///
22140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
22141#[inline]
22142#[target_feature(enable = "avx512f,avx512vl")]
22143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22144#[cfg_attr(test, assert_instr(vpermi2q))]
22145pub unsafe fn _mm_mask2_permutex2var_epi64(
22146 a: __m128i,
22147 idx: __m128i,
22148 k: __mmask8,
22149 b: __m128i,
22150) -> __m128i {
22151 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22152 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
22153}
22154
22155/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22156///
22157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
22158#[inline]
22159#[target_feature(enable = "avx512f")]
22160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22161#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22162pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
22163 transmute(src:vpermi2ps(a:a.as_f32x16(), idx:idx.as_i32x16(), b:b.as_f32x16()))
22164}
22165
22166/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22167///
22168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
22169#[inline]
22170#[target_feature(enable = "avx512f")]
22171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22172#[cfg_attr(test, assert_instr(vpermt2ps))]
22173pub unsafe fn _mm512_mask_permutex2var_ps(
22174 a: __m512,
22175 k: __mmask16,
22176 idx: __m512i,
22177 b: __m512,
22178) -> __m512 {
22179 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22180 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
22181}
22182
22183/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22184///
22185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
22186#[inline]
22187#[target_feature(enable = "avx512f")]
22188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22189#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22190pub unsafe fn _mm512_maskz_permutex2var_ps(
22191 k: __mmask16,
22192 a: __m512,
22193 idx: __m512i,
22194 b: __m512,
22195) -> __m512 {
22196 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22197 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
22198 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22199}
22200
22201/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22202///
22203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
22204#[inline]
22205#[target_feature(enable = "avx512f")]
22206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22207#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22208pub unsafe fn _mm512_mask2_permutex2var_ps(
22209 a: __m512,
22210 idx: __m512i,
22211 k: __mmask16,
22212 b: __m512,
22213) -> __m512 {
22214 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22215 let idx: f32x16 = _mm512_castsi512_ps(idx).as_f32x16();
22216 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22217}
22218
22219/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22220///
22221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
22222#[inline]
22223#[target_feature(enable = "avx512f,avx512vl")]
22224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22225#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22226pub unsafe fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
22227 transmute(src:vpermi2ps256(a:a.as_f32x8(), idx:idx.as_i32x8(), b:b.as_f32x8()))
22228}
22229
22230/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22231///
22232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
22233#[inline]
22234#[target_feature(enable = "avx512f,avx512vl")]
22235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22236#[cfg_attr(test, assert_instr(vpermt2ps))]
22237pub unsafe fn _mm256_mask_permutex2var_ps(
22238 a: __m256,
22239 k: __mmask8,
22240 idx: __m256i,
22241 b: __m256,
22242) -> __m256 {
22243 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22244 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
22245}
22246
22247/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22248///
22249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
22250#[inline]
22251#[target_feature(enable = "avx512f,avx512vl")]
22252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22253#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22254pub unsafe fn _mm256_maskz_permutex2var_ps(
22255 k: __mmask8,
22256 a: __m256,
22257 idx: __m256i,
22258 b: __m256,
22259) -> __m256 {
22260 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22261 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
22262 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22263}
22264
22265/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22266///
22267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
22268#[inline]
22269#[target_feature(enable = "avx512f,avx512vl")]
22270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22271#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22272pub unsafe fn _mm256_mask2_permutex2var_ps(
22273 a: __m256,
22274 idx: __m256i,
22275 k: __mmask8,
22276 b: __m256,
22277) -> __m256 {
22278 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22279 let idx: f32x8 = _mm256_castsi256_ps(idx).as_f32x8();
22280 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22281}
22282
22283/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22284///
22285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
22286#[inline]
22287#[target_feature(enable = "avx512f,avx512vl")]
22288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22289#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22290pub unsafe fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
22291 transmute(src:vpermi2ps128(a:a.as_f32x4(), idx:idx.as_i32x4(), b:b.as_f32x4()))
22292}
22293
22294/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22295///
22296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
22297#[inline]
22298#[target_feature(enable = "avx512f,avx512vl")]
22299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22300#[cfg_attr(test, assert_instr(vpermt2ps))]
22301pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
22302 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22303 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
22304}
22305
22306/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22307///
22308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
22309#[inline]
22310#[target_feature(enable = "avx512f,avx512vl")]
22311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22312#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22313pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
22314 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22315 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
22316 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22317}
22318
22319/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22320///
22321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
22322#[inline]
22323#[target_feature(enable = "avx512f,avx512vl")]
22324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22325#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22326pub unsafe fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
22327 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22328 let idx: f32x4 = _mm_castsi128_ps(idx).as_f32x4();
22329 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22330}
22331
22332/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22333///
22334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
22335#[inline]
22336#[target_feature(enable = "avx512f")]
22337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22338#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22339pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
22340 transmute(src:vpermi2pd(a:a.as_f64x8(), idx:idx.as_i64x8(), b:b.as_f64x8()))
22341}
22342
22343/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22344///
22345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
22346#[inline]
22347#[target_feature(enable = "avx512f")]
22348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22349#[cfg_attr(test, assert_instr(vpermt2pd))]
22350pub unsafe fn _mm512_mask_permutex2var_pd(
22351 a: __m512d,
22352 k: __mmask8,
22353 idx: __m512i,
22354 b: __m512d,
22355) -> __m512d {
22356 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22357 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
22358}
22359
22360/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22361///
22362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
22363#[inline]
22364#[target_feature(enable = "avx512f")]
22365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22366#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22367pub unsafe fn _mm512_maskz_permutex2var_pd(
22368 k: __mmask8,
22369 a: __m512d,
22370 idx: __m512i,
22371 b: __m512d,
22372) -> __m512d {
22373 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22374 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
22375 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22376}
22377
22378/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22379///
22380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
22381#[inline]
22382#[target_feature(enable = "avx512f")]
22383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22384#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22385pub unsafe fn _mm512_mask2_permutex2var_pd(
22386 a: __m512d,
22387 idx: __m512i,
22388 k: __mmask8,
22389 b: __m512d,
22390) -> __m512d {
22391 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22392 let idx: f64x8 = _mm512_castsi512_pd(idx).as_f64x8();
22393 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22394}
22395
22396/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22397///
22398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
22399#[inline]
22400#[target_feature(enable = "avx512f,avx512vl")]
22401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22402#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22403pub unsafe fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
22404 transmute(src:vpermi2pd256(a:a.as_f64x4(), idx:idx.as_i64x4(), b:b.as_f64x4()))
22405}
22406
22407/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22408///
22409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
22410#[inline]
22411#[target_feature(enable = "avx512f,avx512vl")]
22412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22413#[cfg_attr(test, assert_instr(vpermt2pd))]
22414pub unsafe fn _mm256_mask_permutex2var_pd(
22415 a: __m256d,
22416 k: __mmask8,
22417 idx: __m256i,
22418 b: __m256d,
22419) -> __m256d {
22420 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22421 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
22422}
22423
22424/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22425///
22426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
22427#[inline]
22428#[target_feature(enable = "avx512f,avx512vl")]
22429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22430#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22431pub unsafe fn _mm256_maskz_permutex2var_pd(
22432 k: __mmask8,
22433 a: __m256d,
22434 idx: __m256i,
22435 b: __m256d,
22436) -> __m256d {
22437 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22438 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
22439 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22440}
22441
22442/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22443///
22444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
22445#[inline]
22446#[target_feature(enable = "avx512f,avx512vl")]
22447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22448#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22449pub unsafe fn _mm256_mask2_permutex2var_pd(
22450 a: __m256d,
22451 idx: __m256i,
22452 k: __mmask8,
22453 b: __m256d,
22454) -> __m256d {
22455 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22456 let idx: f64x4 = _mm256_castsi256_pd(idx).as_f64x4();
22457 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22458}
22459
22460/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22461///
22462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
22463#[inline]
22464#[target_feature(enable = "avx512f,avx512vl")]
22465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22466#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22467pub unsafe fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
22468 transmute(src:vpermi2pd128(a:a.as_f64x2(), idx:idx.as_i64x2(), b:b.as_f64x2()))
22469}
22470
22471/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22472///
22473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
22474#[inline]
22475#[target_feature(enable = "avx512f,avx512vl")]
22476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22477#[cfg_attr(test, assert_instr(vpermt2pd))]
22478pub unsafe fn _mm_mask_permutex2var_pd(
22479 a: __m128d,
22480 k: __mmask8,
22481 idx: __m128i,
22482 b: __m128d,
22483) -> __m128d {
22484 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22485 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
22486}
22487
22488/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22489///
22490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
22491#[inline]
22492#[target_feature(enable = "avx512f,avx512vl")]
22493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22494#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22495pub unsafe fn _mm_maskz_permutex2var_pd(
22496 k: __mmask8,
22497 a: __m128d,
22498 idx: __m128i,
22499 b: __m128d,
22500) -> __m128d {
22501 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22502 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
22503 transmute(src:simd_select_bitmask(m:k, yes:permute, no:zero))
22504}
22505
22506/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22507///
22508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
22509#[inline]
22510#[target_feature(enable = "avx512f,avx512vl")]
22511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22512#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22513pub unsafe fn _mm_mask2_permutex2var_pd(
22514 a: __m128d,
22515 idx: __m128i,
22516 k: __mmask8,
22517 b: __m128d,
22518) -> __m128d {
22519 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22520 let idx: f64x2 = _mm_castsi128_pd(idx).as_f64x2();
22521 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
22522}
22523
22524/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22525///
22526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_shuffle_epi32&expand=5150)
22527#[inline]
22528#[target_feature(enable = "avx512f")]
22529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22530#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
22531#[rustc_legacy_const_generics(1)]
22532pub unsafe fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
22533 static_assert_uimm_bits!(MASK, 8);
22534 let r: i32x16 = simd_shuffle!(
22535 a.as_i32x16(),
22536 a.as_i32x16(),
22537 [
22538 MASK as u32 & 0b11,
22539 (MASK as u32 >> 2) & 0b11,
22540 (MASK as u32 >> 4) & 0b11,
22541 (MASK as u32 >> 6) & 0b11,
22542 (MASK as u32 & 0b11) + 4,
22543 ((MASK as u32 >> 2) & 0b11) + 4,
22544 ((MASK as u32 >> 4) & 0b11) + 4,
22545 ((MASK as u32 >> 6) & 0b11) + 4,
22546 (MASK as u32 & 0b11) + 8,
22547 ((MASK as u32 >> 2) & 0b11) + 8,
22548 ((MASK as u32 >> 4) & 0b11) + 8,
22549 ((MASK as u32 >> 6) & 0b11) + 8,
22550 (MASK as u32 & 0b11) + 12,
22551 ((MASK as u32 >> 2) & 0b11) + 12,
22552 ((MASK as u32 >> 4) & 0b11) + 12,
22553 ((MASK as u32 >> 6) & 0b11) + 12,
22554 ],
22555 );
22556 transmute(r)
22557}
22558
22559/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22560///
22561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
22562#[inline]
22563#[target_feature(enable = "avx512f")]
22564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22565#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22566#[rustc_legacy_const_generics(3)]
22567pub unsafe fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22568 src: __m512i,
22569 k: __mmask16,
22570 a: __m512i,
22571) -> __m512i {
22572 static_assert_uimm_bits!(MASK, 8);
22573 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
22574 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
22575}
22576
22577/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22578///
22579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
22580#[inline]
22581#[target_feature(enable = "avx512f")]
22582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22583#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22584#[rustc_legacy_const_generics(2)]
22585pub unsafe fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22586 k: __mmask16,
22587 a: __m512i,
22588) -> __m512i {
22589 static_assert_uimm_bits!(MASK, 8);
22590 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
22591 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
22592 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
22593}
22594
22595/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22596///
22597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
22598#[inline]
22599#[target_feature(enable = "avx512f,avx512vl")]
22600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22601#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22602#[rustc_legacy_const_generics(3)]
22603pub unsafe fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22604 src: __m256i,
22605 k: __mmask8,
22606 a: __m256i,
22607) -> __m256i {
22608 static_assert_uimm_bits!(MASK, 8);
22609 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
22610 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
22611}
22612
22613/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22614///
22615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
22616#[inline]
22617#[target_feature(enable = "avx512f,avx512vl")]
22618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22619#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22620#[rustc_legacy_const_generics(2)]
22621pub unsafe fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22622 k: __mmask8,
22623 a: __m256i,
22624) -> __m256i {
22625 static_assert_uimm_bits!(MASK, 8);
22626 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
22627 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
22628 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
22629}
22630
22631/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22632///
22633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
22634#[inline]
22635#[target_feature(enable = "avx512f,avx512vl")]
22636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22637#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22638#[rustc_legacy_const_generics(3)]
22639pub unsafe fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22640 src: __m128i,
22641 k: __mmask8,
22642 a: __m128i,
22643) -> __m128i {
22644 static_assert_uimm_bits!(MASK, 8);
22645 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
22646 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
22647}
22648
22649/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22650///
22651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
22652#[inline]
22653#[target_feature(enable = "avx512f,avx512vl")]
22654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22655#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22656#[rustc_legacy_const_generics(2)]
22657pub unsafe fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22658 k: __mmask8,
22659 a: __m128i,
22660) -> __m128i {
22661 static_assert_uimm_bits!(MASK, 8);
22662 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
22663 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
22664 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
22665}
22666
22667/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22668///
22669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
22670#[inline]
22671#[target_feature(enable = "avx512f")]
22672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22673#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22674#[rustc_legacy_const_generics(2)]
22675pub unsafe fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
22676 static_assert_uimm_bits!(MASK, 8);
22677 simd_shuffle!(
22678 a,
22679 b,
22680 [
22681 MASK as u32 & 0b11,
22682 (MASK as u32 >> 2) & 0b11,
22683 ((MASK as u32 >> 4) & 0b11) + 16,
22684 ((MASK as u32 >> 6) & 0b11) + 16,
22685 (MASK as u32 & 0b11) + 4,
22686 ((MASK as u32 >> 2) & 0b11) + 4,
22687 ((MASK as u32 >> 4) & 0b11) + 20,
22688 ((MASK as u32 >> 6) & 0b11) + 20,
22689 (MASK as u32 & 0b11) + 8,
22690 ((MASK as u32 >> 2) & 0b11) + 8,
22691 ((MASK as u32 >> 4) & 0b11) + 24,
22692 ((MASK as u32 >> 6) & 0b11) + 24,
22693 (MASK as u32 & 0b11) + 12,
22694 ((MASK as u32 >> 2) & 0b11) + 12,
22695 ((MASK as u32 >> 4) & 0b11) + 28,
22696 ((MASK as u32 >> 6) & 0b11) + 28,
22697 ],
22698 )
22699}
22700
22701/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22702///
22703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
22704#[inline]
22705#[target_feature(enable = "avx512f")]
22706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22707#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22708#[rustc_legacy_const_generics(4)]
22709pub unsafe fn _mm512_mask_shuffle_ps<const MASK: i32>(
22710 src: __m512,
22711 k: __mmask16,
22712 a: __m512,
22713 b: __m512,
22714) -> __m512 {
22715 static_assert_uimm_bits!(MASK, 8);
22716 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
22717 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22718}
22719
22720/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22721///
22722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
22723#[inline]
22724#[target_feature(enable = "avx512f")]
22725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22726#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22727#[rustc_legacy_const_generics(3)]
22728pub unsafe fn _mm512_maskz_shuffle_ps<const MASK: i32>(
22729 k: __mmask16,
22730 a: __m512,
22731 b: __m512,
22732) -> __m512 {
22733 static_assert_uimm_bits!(MASK, 8);
22734 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
22735 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
22736 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
22737}
22738
22739/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22740///
22741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
22742#[inline]
22743#[target_feature(enable = "avx512f,avx512vl")]
22744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22745#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22746#[rustc_legacy_const_generics(4)]
22747pub unsafe fn _mm256_mask_shuffle_ps<const MASK: i32>(
22748 src: __m256,
22749 k: __mmask8,
22750 a: __m256,
22751 b: __m256,
22752) -> __m256 {
22753 static_assert_uimm_bits!(MASK, 8);
22754 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
22755 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22756}
22757
22758/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22759///
22760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
22761#[inline]
22762#[target_feature(enable = "avx512f,avx512vl")]
22763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22764#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22765#[rustc_legacy_const_generics(3)]
22766pub unsafe fn _mm256_maskz_shuffle_ps<const MASK: i32>(
22767 k: __mmask8,
22768 a: __m256,
22769 b: __m256,
22770) -> __m256 {
22771 static_assert_uimm_bits!(MASK, 8);
22772 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
22773 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
22774 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
22775}
22776
22777/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22778///
22779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
22780#[inline]
22781#[target_feature(enable = "avx512f,avx512vl")]
22782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22783#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22784#[rustc_legacy_const_generics(4)]
22785pub unsafe fn _mm_mask_shuffle_ps<const MASK: i32>(
22786 src: __m128,
22787 k: __mmask8,
22788 a: __m128,
22789 b: __m128,
22790) -> __m128 {
22791 static_assert_uimm_bits!(MASK, 8);
22792 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
22793 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22794}
22795
22796/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22797///
22798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
22799#[inline]
22800#[target_feature(enable = "avx512f,avx512vl")]
22801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22802#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22803#[rustc_legacy_const_generics(3)]
22804pub unsafe fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
22805 static_assert_uimm_bits!(MASK, 8);
22806 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
22807 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
22808 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
22809}
22810
22811/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
22812///
22813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
22814#[inline]
22815#[target_feature(enable = "avx512f")]
22816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22817#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22818#[rustc_legacy_const_generics(2)]
22819pub unsafe fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
22820 static_assert_uimm_bits!(MASK, 8);
22821 simd_shuffle!(
22822 a,
22823 b,
22824 [
22825 MASK as u32 & 0b1,
22826 ((MASK as u32 >> 1) & 0b1) + 8,
22827 ((MASK as u32 >> 2) & 0b1) + 2,
22828 ((MASK as u32 >> 3) & 0b1) + 10,
22829 ((MASK as u32 >> 4) & 0b1) + 4,
22830 ((MASK as u32 >> 5) & 0b1) + 12,
22831 ((MASK as u32 >> 6) & 0b1) + 6,
22832 ((MASK as u32 >> 7) & 0b1) + 14,
22833 ],
22834 )
22835}
22836
22837/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22838///
22839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
22840#[inline]
22841#[target_feature(enable = "avx512f")]
22842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22843#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22844#[rustc_legacy_const_generics(4)]
22845pub unsafe fn _mm512_mask_shuffle_pd<const MASK: i32>(
22846 src: __m512d,
22847 k: __mmask8,
22848 a: __m512d,
22849 b: __m512d,
22850) -> __m512d {
22851 static_assert_uimm_bits!(MASK, 8);
22852 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
22853 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22854}
22855
22856/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22857///
22858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
22859#[inline]
22860#[target_feature(enable = "avx512f")]
22861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22862#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22863#[rustc_legacy_const_generics(3)]
22864pub unsafe fn _mm512_maskz_shuffle_pd<const MASK: i32>(
22865 k: __mmask8,
22866 a: __m512d,
22867 b: __m512d,
22868) -> __m512d {
22869 static_assert_uimm_bits!(MASK, 8);
22870 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
22871 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
22872 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
22873}
22874
22875/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22876///
22877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
22878#[inline]
22879#[target_feature(enable = "avx512f,avx512vl")]
22880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22881#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22882#[rustc_legacy_const_generics(4)]
22883pub unsafe fn _mm256_mask_shuffle_pd<const MASK: i32>(
22884 src: __m256d,
22885 k: __mmask8,
22886 a: __m256d,
22887 b: __m256d,
22888) -> __m256d {
22889 static_assert_uimm_bits!(MASK, 8);
22890 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
22891 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22892}
22893
22894/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22895///
22896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
22897#[inline]
22898#[target_feature(enable = "avx512f,avx512vl")]
22899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22900#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22901#[rustc_legacy_const_generics(3)]
22902pub unsafe fn _mm256_maskz_shuffle_pd<const MASK: i32>(
22903 k: __mmask8,
22904 a: __m256d,
22905 b: __m256d,
22906) -> __m256d {
22907 static_assert_uimm_bits!(MASK, 8);
22908 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
22909 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
22910 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
22911}
22912
22913/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22914///
22915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
22916#[inline]
22917#[target_feature(enable = "avx512f,avx512vl")]
22918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22919#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
22920#[rustc_legacy_const_generics(4)]
22921pub unsafe fn _mm_mask_shuffle_pd<const MASK: i32>(
22922 src: __m128d,
22923 k: __mmask8,
22924 a: __m128d,
22925 b: __m128d,
22926) -> __m128d {
22927 static_assert_uimm_bits!(MASK, 8);
22928 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
22929 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
22930}
22931
22932/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22933///
22934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
22935#[inline]
22936#[target_feature(enable = "avx512f,avx512vl")]
22937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22938#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
22939#[rustc_legacy_const_generics(3)]
22940pub unsafe fn _mm_maskz_shuffle_pd<const MASK: i32>(
22941 k: __mmask8,
22942 a: __m128d,
22943 b: __m128d,
22944) -> __m128d {
22945 static_assert_uimm_bits!(MASK, 8);
22946 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
22947 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
22948 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:zero))
22949}
22950
22951/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32&expand=5177)
22954#[inline]
22955#[target_feature(enable = "avx512f")]
22956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
22958#[rustc_legacy_const_generics(2)]
22959pub unsafe fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
22960 static_assert_uimm_bits!(MASK, 8);
22961 let a = a.as_i32x16();
22962 let b = b.as_i32x16();
22963 let r: i32x16 = simd_shuffle!(
22964 a,
22965 b,
22966 [
22967 (MASK as u32 & 0b11) * 4 + 0,
22968 (MASK as u32 & 0b11) * 4 + 1,
22969 (MASK as u32 & 0b11) * 4 + 2,
22970 (MASK as u32 & 0b11) * 4 + 3,
22971 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
22972 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
22973 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
22974 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
22975 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
22976 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
22977 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
22978 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
22979 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
22980 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
22981 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
22982 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
22983 ],
22984 );
22985 transmute(r)
22986}
22987
22988/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22989///
22990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x&expand=5175)
22991#[inline]
22992#[target_feature(enable = "avx512f")]
22993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22994#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
22995#[rustc_legacy_const_generics(4)]
22996pub unsafe fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
22997 src: __m512i,
22998 k: __mmask16,
22999 a: __m512i,
23000 b: __m512i,
23001) -> __m512i {
23002 static_assert_uimm_bits!(MASK, 8);
23003 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
23004 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
23005}
23006
23007/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23008///
23009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32&expand=5176)
23010#[inline]
23011#[target_feature(enable = "avx512f")]
23012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23013#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
23014#[rustc_legacy_const_generics(3)]
23015pub unsafe fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
23016 k: __mmask16,
23017 a: __m512i,
23018 b: __m512i,
23019) -> __m512i {
23020 static_assert_uimm_bits!(MASK, 8);
23021 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
23022 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
23023 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
23024}
23025
23026/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
23027///
23028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
23029#[inline]
23030#[target_feature(enable = "avx512f,avx512vl")]
23031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23032#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
23033#[rustc_legacy_const_generics(2)]
23034pub unsafe fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
23035 static_assert_uimm_bits!(MASK, 8);
23036 let a: i32x8 = a.as_i32x8();
23037 let b: i32x8 = b.as_i32x8();
23038 let r: i32x8 = simd_shuffle!(
23039 a,
23040 b,
23041 [
23042 (MASK as u32 & 0b1) * 4 + 0,
23043 (MASK as u32 & 0b1) * 4 + 1,
23044 (MASK as u32 & 0b1) * 4 + 2,
23045 (MASK as u32 & 0b1) * 4 + 3,
23046 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
23047 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
23048 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
23049 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
23050 ],
23051 );
23052 transmute(src:r)
23053}
23054
23055/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23056///
23057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
23058#[inline]
23059#[target_feature(enable = "avx512f,avx512vl")]
23060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23061#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
23062#[rustc_legacy_const_generics(4)]
23063pub unsafe fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
23064 src: __m256i,
23065 k: __mmask8,
23066 a: __m256i,
23067 b: __m256i,
23068) -> __m256i {
23069 static_assert_uimm_bits!(MASK, 8);
23070 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
23071 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
23072}
23073
23074/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23075///
23076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
23077#[inline]
23078#[target_feature(enable = "avx512f,avx512vl")]
23079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23080#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
23081#[rustc_legacy_const_generics(3)]
23082pub unsafe fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
23083 k: __mmask8,
23084 a: __m256i,
23085 b: __m256i,
23086) -> __m256i {
23087 static_assert_uimm_bits!(MASK, 8);
23088 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
23089 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
23090 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
23091}
23092
23093/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
23094///
23095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
23096#[inline]
23097#[target_feature(enable = "avx512f")]
23098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
23100#[rustc_legacy_const_generics(2)]
23101pub unsafe fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
23102 static_assert_uimm_bits!(MASK, 8);
23103 let a: i64x8 = a.as_i64x8();
23104 let b: i64x8 = b.as_i64x8();
23105 let r: i64x8 = simd_shuffle!(
23106 a,
23107 b,
23108 [
23109 (MASK as u32 & 0b11) * 2 + 0,
23110 (MASK as u32 & 0b11) * 2 + 1,
23111 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
23112 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
23113 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
23114 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
23115 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
23116 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
23117 ],
23118 );
23119 transmute(src:r)
23120}
23121
23122/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23123///
23124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x&expand=5181)
23125#[inline]
23126#[target_feature(enable = "avx512f")]
23127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23128#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
23129#[rustc_legacy_const_generics(4)]
23130pub unsafe fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
23131 src: __m512i,
23132 k: __mmask8,
23133 a: __m512i,
23134 b: __m512i,
23135) -> __m512i {
23136 static_assert_uimm_bits!(MASK, 8);
23137 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
23138 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
23139}
23140
23141/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23142///
23143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64&expand=5182)
23144#[inline]
23145#[target_feature(enable = "avx512f")]
23146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23147#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
23148#[rustc_legacy_const_generics(3)]
23149pub unsafe fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
23150 k: __mmask8,
23151 a: __m512i,
23152 b: __m512i,
23153) -> __m512i {
23154 static_assert_uimm_bits!(MASK, 8);
23155 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
23156 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
23157 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
23158}
23159
23160/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
23161///
23162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
23163#[inline]
23164#[target_feature(enable = "avx512f,avx512vl")]
23165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23166#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
23167#[rustc_legacy_const_generics(2)]
23168pub unsafe fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
23169 static_assert_uimm_bits!(MASK, 8);
23170 let a: i64x4 = a.as_i64x4();
23171 let b: i64x4 = b.as_i64x4();
23172 let r: i64x4 = simd_shuffle!(
23173 a,
23174 b,
23175 [
23176 (MASK as u32 & 0b1) * 2 + 0,
23177 (MASK as u32 & 0b1) * 2 + 1,
23178 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
23179 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
23180 ],
23181 );
23182 transmute(src:r)
23183}
23184
23185/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23186///
23187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
23188#[inline]
23189#[target_feature(enable = "avx512f,avx512vl")]
23190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23191#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
23192#[rustc_legacy_const_generics(4)]
23193pub unsafe fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
23194 src: __m256i,
23195 k: __mmask8,
23196 a: __m256i,
23197 b: __m256i,
23198) -> __m256i {
23199 static_assert_uimm_bits!(MASK, 8);
23200 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
23201 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
23202}
23203
23204/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23205///
23206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
23207#[inline]
23208#[target_feature(enable = "avx512f,avx512vl")]
23209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23210#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
23211#[rustc_legacy_const_generics(3)]
23212pub unsafe fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
23213 k: __mmask8,
23214 a: __m256i,
23215 b: __m256i,
23216) -> __m256i {
23217 static_assert_uimm_bits!(MASK, 8);
23218 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
23219 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
23220 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
23221}
23222
23223/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23224///
23225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
23226#[inline]
23227#[target_feature(enable = "avx512f")]
23228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23229#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
23230#[rustc_legacy_const_generics(2)]
23231pub unsafe fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23232 static_assert_uimm_bits!(MASK, 8);
23233 let a = a.as_f32x16();
23234 let b = b.as_f32x16();
23235 let r: f32x16 = simd_shuffle!(
23236 a,
23237 b,
23238 [
23239 (MASK as u32 & 0b11) * 4 + 0,
23240 (MASK as u32 & 0b11) * 4 + 1,
23241 (MASK as u32 & 0b11) * 4 + 2,
23242 (MASK as u32 & 0b11) * 4 + 3,
23243 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
23244 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
23245 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
23246 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
23247 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
23248 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
23249 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
23250 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
23251 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
23252 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
23253 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
23254 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
23255 ],
23256 );
23257 transmute(r)
23258}
23259
23260/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23261///
23262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32&expand=5163)
23263#[inline]
23264#[target_feature(enable = "avx512f")]
23265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23266#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
23267#[rustc_legacy_const_generics(4)]
23268pub unsafe fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
23269 src: __m512,
23270 k: __mmask16,
23271 a: __m512,
23272 b: __m512,
23273) -> __m512 {
23274 static_assert_uimm_bits!(MASK, 8);
23275 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
23276 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
23277}
23278
23279/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23280///
23281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32&expand=5164)
23282#[inline]
23283#[target_feature(enable = "avx512f")]
23284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23285#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
23286#[rustc_legacy_const_generics(3)]
23287pub unsafe fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
23288 k: __mmask16,
23289 a: __m512,
23290 b: __m512,
23291) -> __m512 {
23292 static_assert_uimm_bits!(MASK, 8);
23293 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
23294 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
23295 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
23296}
23297
23298/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23299///
23300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
23301#[inline]
23302#[target_feature(enable = "avx512f,avx512vl")]
23303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23304#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
23305#[rustc_legacy_const_generics(2)]
23306pub unsafe fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
23307 static_assert_uimm_bits!(MASK, 8);
23308 let a: f32x8 = a.as_f32x8();
23309 let b: f32x8 = b.as_f32x8();
23310 let r: f32x8 = simd_shuffle!(
23311 a,
23312 b,
23313 [
23314 (MASK as u32 & 0b1) * 4 + 0,
23315 (MASK as u32 & 0b1) * 4 + 1,
23316 (MASK as u32 & 0b1) * 4 + 2,
23317 (MASK as u32 & 0b1) * 4 + 3,
23318 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
23319 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
23320 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
23321 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
23322 ],
23323 );
23324 transmute(src:r)
23325}
23326
23327/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23328///
23329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
23330#[inline]
23331#[target_feature(enable = "avx512f,avx512vl")]
23332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23333#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
23334#[rustc_legacy_const_generics(4)]
23335pub unsafe fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
23336 src: __m256,
23337 k: __mmask8,
23338 a: __m256,
23339 b: __m256,
23340) -> __m256 {
23341 static_assert_uimm_bits!(MASK, 8);
23342 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
23343 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
23344}
23345
23346/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23347///
23348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
23349#[inline]
23350#[target_feature(enable = "avx512f,avx512vl")]
23351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23352#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
23353#[rustc_legacy_const_generics(3)]
23354pub unsafe fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
23355 k: __mmask8,
23356 a: __m256,
23357 b: __m256,
23358) -> __m256 {
23359 static_assert_uimm_bits!(MASK, 8);
23360 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
23361 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
23362 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
23363}
23364
23365/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23366///
23367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
23368#[inline]
23369#[target_feature(enable = "avx512f")]
23370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23371#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
23372#[rustc_legacy_const_generics(2)]
23373pub unsafe fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
23374 static_assert_uimm_bits!(MASK, 8);
23375 let a: f64x8 = a.as_f64x8();
23376 let b: f64x8 = b.as_f64x8();
23377 let r: f64x8 = simd_shuffle!(
23378 a,
23379 b,
23380 [
23381 (MASK as u32 & 0b11) * 2 + 0,
23382 (MASK as u32 & 0b11) * 2 + 1,
23383 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
23384 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
23385 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
23386 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
23387 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
23388 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
23389 ],
23390 );
23391 transmute(src:r)
23392}
23393
23394/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23395///
23396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
23397#[inline]
23398#[target_feature(enable = "avx512f")]
23399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23400#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
23401#[rustc_legacy_const_generics(4)]
23402pub unsafe fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
23403 src: __m512d,
23404 k: __mmask8,
23405 a: __m512d,
23406 b: __m512d,
23407) -> __m512d {
23408 static_assert_uimm_bits!(MASK, 8);
23409 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
23410 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
23411}
23412
23413/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23414///
23415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
23416#[inline]
23417#[target_feature(enable = "avx512f")]
23418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23419#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
23420#[rustc_legacy_const_generics(3)]
23421pub unsafe fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
23422 k: __mmask8,
23423 a: __m512d,
23424 b: __m512d,
23425) -> __m512d {
23426 static_assert_uimm_bits!(MASK, 8);
23427 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
23428 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
23429 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
23430}
23431
23432/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23433///
23434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
23435#[inline]
23436#[target_feature(enable = "avx512f,avx512vl")]
23437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23438#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
23439#[rustc_legacy_const_generics(2)]
23440pub unsafe fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
23441 static_assert_uimm_bits!(MASK, 8);
23442 let a: f64x4 = a.as_f64x4();
23443 let b: f64x4 = b.as_f64x4();
23444 let r: f64x4 = simd_shuffle!(
23445 a,
23446 b,
23447 [
23448 (MASK as u32 & 0b1) * 2 + 0,
23449 (MASK as u32 & 0b1) * 2 + 1,
23450 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
23451 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
23452 ],
23453 );
23454 transmute(src:r)
23455}
23456
23457/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23458///
23459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
23460#[inline]
23461#[target_feature(enable = "avx512f,avx512vl")]
23462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
23464#[rustc_legacy_const_generics(4)]
23465pub unsafe fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
23466 src: __m256d,
23467 k: __mmask8,
23468 a: __m256d,
23469 b: __m256d,
23470) -> __m256d {
23471 static_assert_uimm_bits!(MASK, 8);
23472 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
23473 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23474}
23475
23476/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23477///
23478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
23479#[inline]
23480#[target_feature(enable = "avx512f,avx512vl")]
23481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23482#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
23483#[rustc_legacy_const_generics(3)]
23484pub unsafe fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
23485 k: __mmask8,
23486 a: __m256d,
23487 b: __m256d,
23488) -> __m256d {
23489 static_assert_uimm_bits!(MASK, 8);
23490 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
23491 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
23492 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
23493}
23494
23495/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23496///
23497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
23498#[inline]
23499#[target_feature(enable = "avx512f")]
23500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23501#[cfg_attr(
23502 all(test, not(target_os = "windows")),
23503 assert_instr(vextractf32x4, IMM8 = 3)
23504)]
23505#[rustc_legacy_const_generics(1)]
23506pub unsafe fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
23507 static_assert_uimm_bits!(IMM8, 2);
23508 match IMM8 & 0x3 {
23509 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
23510 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
23511 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
23512 _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
23513 }
23514}
23515
23516/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23517///
23518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
23519#[inline]
23520#[target_feature(enable = "avx512f")]
23521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23522#[cfg_attr(
23523 all(test, not(target_os = "windows")),
23524 assert_instr(vextractf32x4, IMM8 = 3)
23525)]
23526#[rustc_legacy_const_generics(3)]
23527pub unsafe fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
23528 src: __m128,
23529 k: __mmask8,
23530 a: __m512,
23531) -> __m128 {
23532 static_assert_uimm_bits!(IMM8, 2);
23533 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
23534 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
23535}
23536
23537/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23538///
23539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
23540#[inline]
23541#[target_feature(enable = "avx512f")]
23542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23543#[cfg_attr(
23544 all(test, not(target_os = "windows")),
23545 assert_instr(vextractf32x4, IMM8 = 3)
23546)]
23547#[rustc_legacy_const_generics(2)]
23548pub unsafe fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
23549 static_assert_uimm_bits!(IMM8, 2);
23550 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
23551 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
23552 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
23553}
23554
23555/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23556///
23557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
23558#[inline]
23559#[target_feature(enable = "avx512f,avx512vl")]
23560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23561#[cfg_attr(
23562 all(test, not(target_os = "windows")),
23563 assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
23564)]
23565#[rustc_legacy_const_generics(1)]
23566pub unsafe fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
23567 static_assert_uimm_bits!(IMM8, 1);
23568 match IMM8 & 0x1 {
23569 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
23570 _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
23571 }
23572}
23573
23574/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23575///
23576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
23577#[inline]
23578#[target_feature(enable = "avx512f,avx512vl")]
23579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23580#[cfg_attr(
23581 all(test, not(target_os = "windows")),
23582 assert_instr(vextractf32x4, IMM8 = 1)
23583)]
23584#[rustc_legacy_const_generics(3)]
23585pub unsafe fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
23586 src: __m128,
23587 k: __mmask8,
23588 a: __m256,
23589) -> __m128 {
23590 static_assert_uimm_bits!(IMM8, 1);
23591 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
23592 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
23593}
23594
23595/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23596///
23597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
23598#[inline]
23599#[target_feature(enable = "avx512f,avx512vl")]
23600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23601#[cfg_attr(
23602 all(test, not(target_os = "windows")),
23603 assert_instr(vextractf32x4, IMM8 = 1)
23604)]
23605#[rustc_legacy_const_generics(2)]
23606pub unsafe fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
23607 static_assert_uimm_bits!(IMM8, 1);
23608 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
23609 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
23610 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:zero))
23611}
23612
23613/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
23614///
23615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
23616#[inline]
23617#[target_feature(enable = "avx512f")]
23618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23619#[cfg_attr(
23620 all(test, not(target_os = "windows")),
23621 assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
23622)]
23623#[rustc_legacy_const_generics(1)]
23624pub unsafe fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
23625 static_assert_uimm_bits!(IMM1, 1);
23626 match IMM1 {
23627 0 => simd_shuffle!(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
23628 _ => simd_shuffle!(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
23629 }
23630}
23631
23632/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23633///
23634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
23635#[inline]
23636#[target_feature(enable = "avx512f")]
23637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638#[cfg_attr(
23639 all(test, not(target_os = "windows")),
23640 assert_instr(vextracti64x4, IMM1 = 1)
23641)]
23642#[rustc_legacy_const_generics(3)]
23643pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
23644 src: __m256i,
23645 k: __mmask8,
23646 a: __m512i,
23647) -> __m256i {
23648 static_assert_uimm_bits!(IMM1, 1);
23649 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
23650 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
23651}
23652
23653/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23654///
23655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
23656#[inline]
23657#[target_feature(enable = "avx512f")]
23658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23659#[cfg_attr(
23660 all(test, not(target_os = "windows")),
23661 assert_instr(vextracti64x4, IMM1 = 1)
23662)]
23663#[rustc_legacy_const_generics(2)]
23664pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
23665 static_assert_uimm_bits!(IMM1, 1);
23666 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
23667 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
23668 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
23669}
23670
23671/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23672///
23673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
23674#[inline]
23675#[target_feature(enable = "avx512f")]
23676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23677#[cfg_attr(
23678 all(test, not(target_os = "windows")),
23679 assert_instr(vextractf64x4, IMM8 = 1)
23680)]
23681#[rustc_legacy_const_generics(1)]
23682pub unsafe fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
23683 static_assert_uimm_bits!(IMM8, 1);
23684 match IMM8 & 0x1 {
23685 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
23686 _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
23687 }
23688}
23689
23690/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23691///
23692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
23693#[inline]
23694#[target_feature(enable = "avx512f")]
23695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23696#[cfg_attr(
23697 all(test, not(target_os = "windows")),
23698 assert_instr(vextractf64x4, IMM8 = 1)
23699)]
23700#[rustc_legacy_const_generics(3)]
23701pub unsafe fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
23702 src: __m256d,
23703 k: __mmask8,
23704 a: __m512d,
23705) -> __m256d {
23706 static_assert_uimm_bits!(IMM8, 1);
23707 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
23708 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23709}
23710
23711/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23712///
23713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
23714#[inline]
23715#[target_feature(enable = "avx512f")]
23716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23717#[cfg_attr(
23718 all(test, not(target_os = "windows")),
23719 assert_instr(vextractf64x4, IMM8 = 1)
23720)]
23721#[rustc_legacy_const_generics(2)]
23722pub unsafe fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
23723 static_assert_uimm_bits!(IMM8, 1);
23724 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
23725 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
23726 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:zero))
23727}
23728
23729/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
23730///
23731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
23732#[inline]
23733#[target_feature(enable = "avx512f")]
23734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23735#[cfg_attr(
23736 all(test, not(target_os = "windows")),
23737 assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
23738)]
23739#[rustc_legacy_const_generics(1)]
23740pub unsafe fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
23741 static_assert_uimm_bits!(IMM2, 2);
23742 let a: i32x16 = a.as_i32x16();
23743 let undefined: i32x16 = _mm512_undefined_epi32().as_i32x16();
23744 let extract: i32x4 = match IMM2 {
23745 0 => simd_shuffle!(a, undefined, [0, 1, 2, 3]),
23746 1 => simd_shuffle!(a, undefined, [4, 5, 6, 7]),
23747 2 => simd_shuffle!(a, undefined, [8, 9, 10, 11]),
23748 _ => simd_shuffle!(a, undefined, [12, 13, 14, 15]),
23749 };
23750 transmute(src:extract)
23751}
23752
23753/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23754///
23755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
23756#[inline]
23757#[target_feature(enable = "avx512f")]
23758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23759#[cfg_attr(
23760 all(test, not(target_os = "windows")),
23761 assert_instr(vextracti32x4, IMM2 = 3)
23762)]
23763#[rustc_legacy_const_generics(3)]
23764pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
23765 src: __m128i,
23766 k: __mmask8,
23767 a: __m512i,
23768) -> __m128i {
23769 static_assert_uimm_bits!(IMM2, 2);
23770 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
23771 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
23772}
23773
23774/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23775///
23776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
23777#[inline]
23778#[target_feature(enable = "avx512f")]
23779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23780#[cfg_attr(
23781 all(test, not(target_os = "windows")),
23782 assert_instr(vextracti32x4, IMM2 = 3)
23783)]
23784#[rustc_legacy_const_generics(2)]
23785pub unsafe fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
23786 static_assert_uimm_bits!(IMM2, 2);
23787 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
23788 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
23789 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
23790}
23791
23792/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
23793///
23794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
23795#[inline]
23796#[target_feature(enable = "avx512f,avx512vl")]
23797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23798#[cfg_attr(
23799 all(test, not(target_os = "windows")),
23800 assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
23801)]
23802#[rustc_legacy_const_generics(1)]
23803pub unsafe fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
23804 static_assert_uimm_bits!(IMM1, 1);
23805 let a: i32x8 = a.as_i32x8();
23806 let undefined: i32x8 = _mm256_undefined_si256().as_i32x8();
23807 let extract: i32x4 = match IMM1 {
23808 0 => simd_shuffle!(a, undefined, [0, 1, 2, 3]),
23809 _ => simd_shuffle!(a, undefined, [4, 5, 6, 7]),
23810 };
23811 transmute(src:extract)
23812}
23813
23814/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23815///
23816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
23817#[inline]
23818#[target_feature(enable = "avx512f,avx512vl")]
23819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23820#[cfg_attr(
23821 all(test, not(target_os = "windows")),
23822 assert_instr(vextracti32x4, IMM1 = 1)
23823)]
23824#[rustc_legacy_const_generics(3)]
23825pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
23826 src: __m128i,
23827 k: __mmask8,
23828 a: __m256i,
23829) -> __m128i {
23830 static_assert_uimm_bits!(IMM1, 1);
23831 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
23832 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
23833}
23834
23835/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23836///
23837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
23838#[inline]
23839#[target_feature(enable = "avx512f,avx512vl")]
23840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23841#[cfg_attr(
23842 all(test, not(target_os = "windows")),
23843 assert_instr(vextracti32x4, IMM1 = 1)
23844)]
23845#[rustc_legacy_const_generics(2)]
23846pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
23847 static_assert_uimm_bits!(IMM1, 1);
23848 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
23849 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
23850 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
23851}
23852
23853/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
23854///
23855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
23856#[inline]
23857#[target_feature(enable = "avx512f")]
23858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23859#[cfg_attr(test, assert_instr(vmovsldup))]
23860pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
23861 let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
23862 transmute(src:r)
23863}
23864
23865/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23866///
23867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
23868#[inline]
23869#[target_feature(enable = "avx512f")]
23870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23871#[cfg_attr(test, assert_instr(vmovsldup))]
23872pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
23873 let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
23874 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
23875}
23876
23877/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23878///
23879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
23880#[inline]
23881#[target_feature(enable = "avx512f")]
23882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23883#[cfg_attr(test, assert_instr(vmovsldup))]
23884pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
23885 let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
23886 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
23887 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
23888}
23889
23890/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23891///
23892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
23893#[inline]
23894#[target_feature(enable = "avx512f,avx512vl")]
23895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23896#[cfg_attr(test, assert_instr(vmovsldup))]
23897pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
23898 let mov: __m256 = _mm256_moveldup_ps(a);
23899 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
23900}
23901
23902/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23903///
23904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
23905#[inline]
23906#[target_feature(enable = "avx512f,avx512vl")]
23907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23908#[cfg_attr(test, assert_instr(vmovsldup))]
23909pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
23910 let mov: __m256 = _mm256_moveldup_ps(a);
23911 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
23912 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:zero))
23913}
23914
23915/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23916///
23917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
23918#[inline]
23919#[target_feature(enable = "avx512f,avx512vl")]
23920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23921#[cfg_attr(test, assert_instr(vmovsldup))]
23922pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
23923 let mov: __m128 = _mm_moveldup_ps(a);
23924 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
23925}
23926
23927/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23928///
23929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
23930#[inline]
23931#[target_feature(enable = "avx512f,avx512vl")]
23932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23933#[cfg_attr(test, assert_instr(vmovsldup))]
23934pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
23935 let mov: __m128 = _mm_moveldup_ps(a);
23936 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
23937 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:zero))
23938}
23939
23940/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
23941///
23942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_movehdup_ps&expand=3852)
23943#[inline]
23944#[target_feature(enable = "avx512f")]
23945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23946#[cfg_attr(test, assert_instr(vmovshdup))]
23947pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
23948 let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
23949 transmute(src:r)
23950}
23951
23952/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23953///
23954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup&expand=3850)
23955#[inline]
23956#[target_feature(enable = "avx512f")]
23957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23958#[cfg_attr(test, assert_instr(vmovshdup))]
23959pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
23960 let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
23961 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
23962}
23963
23964/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23965///
23966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveh&expand=3851)
23967#[inline]
23968#[target_feature(enable = "avx512f")]
23969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23970#[cfg_attr(test, assert_instr(vmovshdup))]
23971pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
23972 let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
23973 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
23974 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
23975}
23976
23977/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23978///
23979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
23980#[inline]
23981#[target_feature(enable = "avx512f,avx512vl")]
23982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23983#[cfg_attr(test, assert_instr(vmovshdup))]
23984pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
23985 let mov: __m256 = _mm256_movehdup_ps(a);
23986 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
23987}
23988
23989/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23990///
23991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
23992#[inline]
23993#[target_feature(enable = "avx512f,avx512vl")]
23994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23995#[cfg_attr(test, assert_instr(vmovshdup))]
23996pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
23997 let mov: __m256 = _mm256_movehdup_ps(a);
23998 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
23999 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:zero))
24000}
24001
24002/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24003///
24004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
24005#[inline]
24006#[target_feature(enable = "avx512f,avx512vl")]
24007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24008#[cfg_attr(test, assert_instr(vmovshdup))]
24009pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
24010 let mov: __m128 = _mm_movehdup_ps(a);
24011 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
24012}
24013
24014/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24015///
24016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
24017#[inline]
24018#[target_feature(enable = "avx512f,avx512vl")]
24019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24020#[cfg_attr(test, assert_instr(vmovshdup))]
24021pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
24022 let mov: __m128 = _mm_movehdup_ps(a);
24023 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
24024 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:zero))
24025}
24026
24027/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
24028///
24029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
24030#[inline]
24031#[target_feature(enable = "avx512f")]
24032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24033#[cfg_attr(test, assert_instr(vmovddup))]
24034pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
24035 let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
24036 transmute(src:r)
24037}
24038
24039/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24040///
24041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
24042#[inline]
24043#[target_feature(enable = "avx512f")]
24044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24045#[cfg_attr(test, assert_instr(vmovddup))]
24046pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
24047 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
24048 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
24049}
24050
24051/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24052///
24053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
24054#[inline]
24055#[target_feature(enable = "avx512f")]
24056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24057#[cfg_attr(test, assert_instr(vmovddup))]
24058pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
24059 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
24060 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
24061 transmute(src:simd_select_bitmask(m:k, yes:mov, no:zero))
24062}
24063
24064/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
24067#[inline]
24068#[target_feature(enable = "avx512f,avx512vl")]
24069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070#[cfg_attr(test, assert_instr(vmovddup))]
24071pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
24072 let mov: __m256d = _mm256_movedup_pd(a);
24073 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
24074}
24075
24076/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24077///
24078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
24079#[inline]
24080#[target_feature(enable = "avx512f,avx512vl")]
24081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24082#[cfg_attr(test, assert_instr(vmovddup))]
24083pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
24084 let mov: __m256d = _mm256_movedup_pd(a);
24085 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
24086 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:zero))
24087}
24088
24089/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24090///
24091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
24092#[inline]
24093#[target_feature(enable = "avx512f,avx512vl")]
24094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24095#[cfg_attr(test, assert_instr(vmovddup))]
24096pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
24097 let mov: __m128d = _mm_movedup_pd(a);
24098 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
24099}
24100
24101/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
24104#[inline]
24105#[target_feature(enable = "avx512f,avx512vl")]
24106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107#[cfg_attr(test, assert_instr(vmovddup))]
24108pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
24109 let mov: __m128d = _mm_movedup_pd(a);
24110 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
24111 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:zero))
24112}
24113
24114/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
24115///
24116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
24117#[inline]
24118#[target_feature(enable = "avx512f")]
24119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24120#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
24121#[rustc_legacy_const_generics(2)]
24122pub unsafe fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
24123 static_assert_uimm_bits!(IMM8, 2);
24124 let a: i32x16 = a.as_i32x16();
24125 let b: i32x16 = _mm512_castsi128_si512(b).as_i32x16();
24126 let ret: i32x16 = match IMM8 & 0b11 {
24127 0 => simd_shuffle!(
24128 a,
24129 b,
24130 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
24131 ),
24132 1 => simd_shuffle!(
24133 a,
24134 b,
24135 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
24136 ),
24137 2 => simd_shuffle!(
24138 a,
24139 b,
24140 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
24141 ),
24142 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
24143 };
24144 transmute(src:ret)
24145}
24146
24147/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24148///
24149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
24150#[inline]
24151#[target_feature(enable = "avx512f")]
24152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24153#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
24154#[rustc_legacy_const_generics(4)]
24155pub unsafe fn _mm512_mask_inserti32x4<const IMM8: i32>(
24156 src: __m512i,
24157 k: __mmask16,
24158 a: __m512i,
24159 b: __m128i,
24160) -> __m512i {
24161 static_assert_uimm_bits!(IMM8, 2);
24162 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
24163 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24164}
24165
24166/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24167///
24168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
24169#[inline]
24170#[target_feature(enable = "avx512f")]
24171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24172#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
24173#[rustc_legacy_const_generics(3)]
24174pub unsafe fn _mm512_maskz_inserti32x4<const IMM8: i32>(
24175 k: __mmask16,
24176 a: __m512i,
24177 b: __m128i,
24178) -> __m512i {
24179 static_assert_uimm_bits!(IMM8, 2);
24180 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
24181 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
24182 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
24183}
24184
24185/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
24186///
24187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
24188#[inline]
24189#[target_feature(enable = "avx512f,avx512vl")]
24190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24191#[cfg_attr(
24192 all(test, not(target_os = "windows")),
24193 assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
24194)]
24195#[rustc_legacy_const_generics(2)]
24196pub unsafe fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
24197 static_assert_uimm_bits!(IMM8, 1);
24198 let a: i32x8 = a.as_i32x8();
24199 let b: i32x8 = _mm256_castsi128_si256(b).as_i32x8();
24200 let ret: i32x8 = match IMM8 & 0b1 {
24201 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24202 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24203 };
24204 transmute(src:ret)
24205}
24206
24207/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24208///
24209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
24210#[inline]
24211#[target_feature(enable = "avx512f,avx512vl")]
24212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24213#[cfg_attr(
24214 all(test, not(target_os = "windows")),
24215 assert_instr(vinserti32x4, IMM8 = 1)
24216)]
24217#[rustc_legacy_const_generics(4)]
24218pub unsafe fn _mm256_mask_inserti32x4<const IMM8: i32>(
24219 src: __m256i,
24220 k: __mmask8,
24221 a: __m256i,
24222 b: __m128i,
24223) -> __m256i {
24224 static_assert_uimm_bits!(IMM8, 1);
24225 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
24226 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24227}
24228
24229/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24230///
24231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
24232#[inline]
24233#[target_feature(enable = "avx512f,avx512vl")]
24234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24235#[cfg_attr(
24236 all(test, not(target_os = "windows")),
24237 assert_instr(vinserti32x4, IMM8 = 1)
24238)]
24239#[rustc_legacy_const_generics(3)]
24240pub unsafe fn _mm256_maskz_inserti32x4<const IMM8: i32>(
24241 k: __mmask8,
24242 a: __m256i,
24243 b: __m128i,
24244) -> __m256i {
24245 static_assert_uimm_bits!(IMM8, 1);
24246 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
24247 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
24248 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
24249}
24250
24251/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
24252///
24253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
24254#[inline]
24255#[target_feature(enable = "avx512f")]
24256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24257#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
24258#[rustc_legacy_const_generics(2)]
24259pub unsafe fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
24260 static_assert_uimm_bits!(IMM8, 1);
24261 let b: __m512i = _mm512_castsi256_si512(b);
24262 match IMM8 & 0b1 {
24263 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24264 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24265 }
24266}
24267
24268/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24269///
24270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
24271#[inline]
24272#[target_feature(enable = "avx512f")]
24273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24274#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
24275#[rustc_legacy_const_generics(4)]
24276pub unsafe fn _mm512_mask_inserti64x4<const IMM8: i32>(
24277 src: __m512i,
24278 k: __mmask8,
24279 a: __m512i,
24280 b: __m256i,
24281) -> __m512i {
24282 static_assert_uimm_bits!(IMM8, 1);
24283 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
24284 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
24285}
24286
24287/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24288///
24289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
24290#[inline]
24291#[target_feature(enable = "avx512f")]
24292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24293#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
24294#[rustc_legacy_const_generics(3)]
24295pub unsafe fn _mm512_maskz_inserti64x4<const IMM8: i32>(
24296 k: __mmask8,
24297 a: __m512i,
24298 b: __m256i,
24299) -> __m512i {
24300 static_assert_uimm_bits!(IMM8, 1);
24301 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
24302 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
24303 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
24304}
24305
24306/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
24307///
24308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
24309#[inline]
24310#[target_feature(enable = "avx512f")]
24311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24312#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
24313#[rustc_legacy_const_generics(2)]
24314pub unsafe fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
24315 static_assert_uimm_bits!(IMM8, 2);
24316 let b: __m512 = _mm512_castps128_ps512(b);
24317 match IMM8 & 0b11 {
24318 0 => simd_shuffle!(
24319 a,
24320 b,
24321 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
24322 ),
24323 1 => simd_shuffle!(
24324 a,
24325 b,
24326 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
24327 ),
24328 2 => simd_shuffle!(
24329 a,
24330 b,
24331 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
24332 ),
24333 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
24334 }
24335}
24336
24337/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24338///
24339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
24340#[inline]
24341#[target_feature(enable = "avx512f")]
24342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24343#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
24344#[rustc_legacy_const_generics(4)]
24345pub unsafe fn _mm512_mask_insertf32x4<const IMM8: i32>(
24346 src: __m512,
24347 k: __mmask16,
24348 a: __m512,
24349 b: __m128,
24350) -> __m512 {
24351 static_assert_uimm_bits!(IMM8, 2);
24352 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
24353 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24354}
24355
24356/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24357///
24358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
24359#[inline]
24360#[target_feature(enable = "avx512f")]
24361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24362#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
24363#[rustc_legacy_const_generics(3)]
24364pub unsafe fn _mm512_maskz_insertf32x4<const IMM8: i32>(
24365 k: __mmask16,
24366 a: __m512,
24367 b: __m128,
24368) -> __m512 {
24369 static_assert_uimm_bits!(IMM8, 2);
24370 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
24371 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
24372 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:zero))
24373}
24374
24375/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
24376///
24377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
24378#[inline]
24379#[target_feature(enable = "avx512f,avx512vl")]
24380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24381#[cfg_attr(
24382 all(test, not(target_os = "windows")),
24383 assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
24384)]
24385#[rustc_legacy_const_generics(2)]
24386pub unsafe fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
24387 static_assert_uimm_bits!(IMM8, 1);
24388 let b: __m256 = _mm256_castps128_ps256(b);
24389 match IMM8 & 0b1 {
24390 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24391 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24392 }
24393}
24394
24395/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24396///
24397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
24398#[inline]
24399#[target_feature(enable = "avx512f,avx512vl")]
24400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24401#[cfg_attr(
24402 all(test, not(target_os = "windows")),
24403 assert_instr(vinsertf32x4, IMM8 = 1)
24404)]
24405#[rustc_legacy_const_generics(4)]
24406pub unsafe fn _mm256_mask_insertf32x4<const IMM8: i32>(
24407 src: __m256,
24408 k: __mmask8,
24409 a: __m256,
24410 b: __m128,
24411) -> __m256 {
24412 static_assert_uimm_bits!(IMM8, 1);
24413 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
24414 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24415}
24416
24417/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24418///
24419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
24420#[inline]
24421#[target_feature(enable = "avx512f,avx512vl")]
24422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24423#[cfg_attr(
24424 all(test, not(target_os = "windows")),
24425 assert_instr(vinsertf32x4, IMM8 = 1)
24426)]
24427#[rustc_legacy_const_generics(3)]
24428pub unsafe fn _mm256_maskz_insertf32x4<const IMM8: i32>(
24429 k: __mmask8,
24430 a: __m256,
24431 b: __m128,
24432) -> __m256 {
24433 static_assert_uimm_bits!(IMM8, 1);
24434 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
24435 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
24436 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:zero))
24437}
24438
24439/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
24440///
24441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
24442#[inline]
24443#[target_feature(enable = "avx512f")]
24444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24445#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
24446#[rustc_legacy_const_generics(2)]
24447pub unsafe fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
24448 static_assert_uimm_bits!(IMM8, 1);
24449 let b: __m512d = _mm512_castpd256_pd512(b);
24450 match IMM8 & 0b1 {
24451 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24452 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24453 }
24454}
24455
24456/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24457///
24458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
24459#[inline]
24460#[target_feature(enable = "avx512f")]
24461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24462#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
24463#[rustc_legacy_const_generics(4)]
24464pub unsafe fn _mm512_mask_insertf64x4<const IMM8: i32>(
24465 src: __m512d,
24466 k: __mmask8,
24467 a: __m512d,
24468 b: __m256d,
24469) -> __m512d {
24470 static_assert_uimm_bits!(IMM8, 1);
24471 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
24472 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24473}
24474
24475/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24476///
24477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
24478#[inline]
24479#[target_feature(enable = "avx512f")]
24480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24481#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
24482#[rustc_legacy_const_generics(3)]
24483pub unsafe fn _mm512_maskz_insertf64x4<const IMM8: i32>(
24484 k: __mmask8,
24485 a: __m512d,
24486 b: __m256d,
24487) -> __m512d {
24488 static_assert_uimm_bits!(IMM8, 1);
24489 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
24490 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
24491 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:zero))
24492}
24493
24494/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
24495///
24496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
24497#[inline]
24498#[target_feature(enable = "avx512f")]
24499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24500#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
24501pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
24502 let a: i32x16 = a.as_i32x16();
24503 let b: i32x16 = b.as_i32x16();
24504 #[rustfmt::skip]
24505 let r: i32x16 = simd_shuffle!(
24506 a, b,
24507 [ 2, 18, 3, 19,
24508 2 + 4, 18 + 4, 3 + 4, 19 + 4,
24509 2 + 8, 18 + 8, 3 + 8, 19 + 8,
24510 2 + 12, 18 + 12, 3 + 12, 19 + 12],
24511 );
24512 transmute(src:r)
24513}
24514
24515/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24516///
24517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
24518#[inline]
24519#[target_feature(enable = "avx512f")]
24520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24521#[cfg_attr(test, assert_instr(vpunpckhdq))]
24522pub unsafe fn _mm512_mask_unpackhi_epi32(
24523 src: __m512i,
24524 k: __mmask16,
24525 a: __m512i,
24526 b: __m512i,
24527) -> __m512i {
24528 let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
24529 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
24530}
24531
24532/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24533///
24534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
24535#[inline]
24536#[target_feature(enable = "avx512f")]
24537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24538#[cfg_attr(test, assert_instr(vpunpckhdq))]
24539pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
24540 let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
24541 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
24542 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24543}
24544
24545/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24546///
24547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
24548#[inline]
24549#[target_feature(enable = "avx512f,avx512vl")]
24550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24551#[cfg_attr(test, assert_instr(vpunpckhdq))]
24552pub unsafe fn _mm256_mask_unpackhi_epi32(
24553 src: __m256i,
24554 k: __mmask8,
24555 a: __m256i,
24556 b: __m256i,
24557) -> __m256i {
24558 let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
24559 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
24560}
24561
24562/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24563///
24564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
24565#[inline]
24566#[target_feature(enable = "avx512f,avx512vl")]
24567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24568#[cfg_attr(test, assert_instr(vpunpckhdq))]
24569pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24570 let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
24571 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
24572 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24573}
24574
24575/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24576///
24577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
24578#[inline]
24579#[target_feature(enable = "avx512f,avx512vl")]
24580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24581#[cfg_attr(test, assert_instr(vpunpckhdq))]
24582pub unsafe fn _mm_mask_unpackhi_epi32(
24583 src: __m128i,
24584 k: __mmask8,
24585 a: __m128i,
24586 b: __m128i,
24587) -> __m128i {
24588 let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
24589 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
24590}
24591
24592/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24593///
24594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
24595#[inline]
24596#[target_feature(enable = "avx512f,avx512vl")]
24597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24598#[cfg_attr(test, assert_instr(vpunpckhdq))]
24599pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24600 let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
24601 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
24602 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24603}
24604
24605/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
24606///
24607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
24608#[inline]
24609#[target_feature(enable = "avx512f")]
24610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24611#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
24612pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
24613 simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
24614}
24615
24616/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24617///
24618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
24619#[inline]
24620#[target_feature(enable = "avx512f")]
24621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24622#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24623pub unsafe fn _mm512_mask_unpackhi_epi64(
24624 src: __m512i,
24625 k: __mmask8,
24626 a: __m512i,
24627 b: __m512i,
24628) -> __m512i {
24629 let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
24630 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
24631}
24632
24633/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24634///
24635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
24636#[inline]
24637#[target_feature(enable = "avx512f")]
24638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24639#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24640pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24641 let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
24642 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
24643 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24644}
24645
24646/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24647///
24648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
24649#[inline]
24650#[target_feature(enable = "avx512f,avx512vl")]
24651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24652#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24653pub unsafe fn _mm256_mask_unpackhi_epi64(
24654 src: __m256i,
24655 k: __mmask8,
24656 a: __m256i,
24657 b: __m256i,
24658) -> __m256i {
24659 let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
24660 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
24661}
24662
24663/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24664///
24665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
24666#[inline]
24667#[target_feature(enable = "avx512f,avx512vl")]
24668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24669#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24670pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24671 let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
24672 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
24673 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24674}
24675
24676/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677///
24678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
24679#[inline]
24680#[target_feature(enable = "avx512f,avx512vl")]
24681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24682#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24683pub unsafe fn _mm_mask_unpackhi_epi64(
24684 src: __m128i,
24685 k: __mmask8,
24686 a: __m128i,
24687 b: __m128i,
24688) -> __m128i {
24689 let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
24690 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
24691}
24692
24693/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24694///
24695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
24696#[inline]
24697#[target_feature(enable = "avx512f,avx512vl")]
24698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24699#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24700pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24701 let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
24702 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
24703 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24704}
24705
24706/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
24707///
24708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
24709#[inline]
24710#[target_feature(enable = "avx512f")]
24711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24712#[cfg_attr(test, assert_instr(vunpckhps))]
24713pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
24714 #[rustfmt::skip]
24715 simd_shuffle!(
24716 a, b,
24717 [ 2, 18, 3, 19,
24718 2 + 4, 18 + 4, 3 + 4, 19 + 4,
24719 2 + 8, 18 + 8, 3 + 8, 19 + 8,
24720 2 + 12, 18 + 12, 3 + 12, 19 + 12],
24721 )
24722}
24723
24724/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24725///
24726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
24727#[inline]
24728#[target_feature(enable = "avx512f")]
24729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24730#[cfg_attr(test, assert_instr(vunpckhps))]
24731pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
24732 let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
24733 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
24734}
24735
24736/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24737///
24738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
24739#[inline]
24740#[target_feature(enable = "avx512f")]
24741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24742#[cfg_attr(test, assert_instr(vunpckhps))]
24743pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24744 let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
24745 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
24746 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24747}
24748
24749/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24750///
24751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
24752#[inline]
24753#[target_feature(enable = "avx512f,avx512vl")]
24754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24755#[cfg_attr(test, assert_instr(vunpckhps))]
24756pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
24757 let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
24758 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
24759}
24760
24761/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24762///
24763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
24764#[inline]
24765#[target_feature(enable = "avx512f,avx512vl")]
24766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24767#[cfg_attr(test, assert_instr(vunpckhps))]
24768pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24769 let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
24770 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
24771 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24772}
24773
24774/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24775///
24776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
24777#[inline]
24778#[target_feature(enable = "avx512f,avx512vl")]
24779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24780#[cfg_attr(test, assert_instr(vunpckhps))]
24781pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
24782 let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
24783 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
24784}
24785
24786/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24787///
24788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
24789#[inline]
24790#[target_feature(enable = "avx512f,avx512vl")]
24791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24792#[cfg_attr(test, assert_instr(vunpckhps))]
24793pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24794 let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
24795 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
24796 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24797}
24798
24799/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
24800///
24801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
24802#[inline]
24803#[target_feature(enable = "avx512f")]
24804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24805#[cfg_attr(test, assert_instr(vunpckhpd))]
24806pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
24807 simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
24808}
24809
24810/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24811///
24812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
24813#[inline]
24814#[target_feature(enable = "avx512f")]
24815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24816#[cfg_attr(test, assert_instr(vunpckhpd))]
24817pub unsafe fn _mm512_mask_unpackhi_pd(
24818 src: __m512d,
24819 k: __mmask8,
24820 a: __m512d,
24821 b: __m512d,
24822) -> __m512d {
24823 let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
24824 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
24825}
24826
24827/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24828///
24829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
24830#[inline]
24831#[target_feature(enable = "avx512f")]
24832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24833#[cfg_attr(test, assert_instr(vunpckhpd))]
24834pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24835 let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
24836 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
24837 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24838}
24839
24840/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24841///
24842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
24843#[inline]
24844#[target_feature(enable = "avx512f,avx512vl")]
24845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24846#[cfg_attr(test, assert_instr(vunpckhpd))]
24847pub unsafe fn _mm256_mask_unpackhi_pd(
24848 src: __m256d,
24849 k: __mmask8,
24850 a: __m256d,
24851 b: __m256d,
24852) -> __m256d {
24853 let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
24854 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
24855}
24856
24857/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24858///
24859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
24860#[inline]
24861#[target_feature(enable = "avx512f,avx512vl")]
24862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24863#[cfg_attr(test, assert_instr(vunpckhpd))]
24864pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24865 let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
24866 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
24867 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24868}
24869
24870/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24871///
24872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
24873#[inline]
24874#[target_feature(enable = "avx512f,avx512vl")]
24875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24876#[cfg_attr(test, assert_instr(vunpckhpd))]
24877pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24878 let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
24879 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
24880}
24881
24882/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24883///
24884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
24885#[inline]
24886#[target_feature(enable = "avx512f,avx512vl")]
24887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24888#[cfg_attr(test, assert_instr(vunpckhpd))]
24889pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24890 let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
24891 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
24892 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:zero))
24893}
24894
24895/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
24896///
24897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
24898#[inline]
24899#[target_feature(enable = "avx512f")]
24900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24901#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
24902pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
24903 let a: i32x16 = a.as_i32x16();
24904 let b: i32x16 = b.as_i32x16();
24905 #[rustfmt::skip]
24906 let r: i32x16 = simd_shuffle!(
24907 a, b,
24908 [ 0, 16, 1, 17,
24909 0 + 4, 16 + 4, 1 + 4, 17 + 4,
24910 0 + 8, 16 + 8, 1 + 8, 17 + 8,
24911 0 + 12, 16 + 12, 1 + 12, 17 + 12],
24912 );
24913 transmute(src:r)
24914}
24915
24916/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24917///
24918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
24919#[inline]
24920#[target_feature(enable = "avx512f")]
24921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24922#[cfg_attr(test, assert_instr(vpunpckldq))]
24923pub unsafe fn _mm512_mask_unpacklo_epi32(
24924 src: __m512i,
24925 k: __mmask16,
24926 a: __m512i,
24927 b: __m512i,
24928) -> __m512i {
24929 let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
24930 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
24931}
24932
24933/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24934///
24935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
24936#[inline]
24937#[target_feature(enable = "avx512f")]
24938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24939#[cfg_attr(test, assert_instr(vpunpckldq))]
24940pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
24941 let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
24942 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
24943 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
24944}
24945
24946/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24947///
24948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
24949#[inline]
24950#[target_feature(enable = "avx512f,avx512vl")]
24951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24952#[cfg_attr(test, assert_instr(vpunpckldq))]
24953pub unsafe fn _mm256_mask_unpacklo_epi32(
24954 src: __m256i,
24955 k: __mmask8,
24956 a: __m256i,
24957 b: __m256i,
24958) -> __m256i {
24959 let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
24960 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
24961}
24962
24963/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24964///
24965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
24966#[inline]
24967#[target_feature(enable = "avx512f,avx512vl")]
24968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24969#[cfg_attr(test, assert_instr(vpunpckldq))]
24970pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24971 let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
24972 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
24973 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
24974}
24975
24976/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24977///
24978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
24979#[inline]
24980#[target_feature(enable = "avx512f,avx512vl")]
24981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24982#[cfg_attr(test, assert_instr(vpunpckldq))]
24983pub unsafe fn _mm_mask_unpacklo_epi32(
24984 src: __m128i,
24985 k: __mmask8,
24986 a: __m128i,
24987 b: __m128i,
24988) -> __m128i {
24989 let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
24990 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
24991}
24992
24993/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24994///
24995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
24996#[inline]
24997#[target_feature(enable = "avx512f,avx512vl")]
24998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24999#[cfg_attr(test, assert_instr(vpunpckldq))]
25000pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25001 let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
25002 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
25003 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25004}
25005
25006/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
25007///
25008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
25009#[inline]
25010#[target_feature(enable = "avx512f")]
25011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25012#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
25013pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
25014 simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
25015}
25016
25017/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25018///
25019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
25020#[inline]
25021#[target_feature(enable = "avx512f")]
25022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25023#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25024pub unsafe fn _mm512_mask_unpacklo_epi64(
25025 src: __m512i,
25026 k: __mmask8,
25027 a: __m512i,
25028 b: __m512i,
25029) -> __m512i {
25030 let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
25031 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
25032}
25033
25034/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25035///
25036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
25037#[inline]
25038#[target_feature(enable = "avx512f")]
25039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25040#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25041pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
25042 let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
25043 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
25044 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25045}
25046
25047/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25048///
25049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
25050#[inline]
25051#[target_feature(enable = "avx512f,avx512vl")]
25052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25053#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25054pub unsafe fn _mm256_mask_unpacklo_epi64(
25055 src: __m256i,
25056 k: __mmask8,
25057 a: __m256i,
25058 b: __m256i,
25059) -> __m256i {
25060 let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
25061 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
25062}
25063
25064/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25065///
25066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
25067#[inline]
25068#[target_feature(enable = "avx512f,avx512vl")]
25069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25070#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25071pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25072 let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
25073 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
25074 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25075}
25076
25077/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25078///
25079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
25080#[inline]
25081#[target_feature(enable = "avx512f,avx512vl")]
25082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25083#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25084pub unsafe fn _mm_mask_unpacklo_epi64(
25085 src: __m128i,
25086 k: __mmask8,
25087 a: __m128i,
25088 b: __m128i,
25089) -> __m128i {
25090 let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
25091 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
25092}
25093
25094/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095///
25096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
25097#[inline]
25098#[target_feature(enable = "avx512f,avx512vl")]
25099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25100#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25101pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25102 let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
25103 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
25104 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25105}
25106
25107/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
25108///
25109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
25110#[inline]
25111#[target_feature(enable = "avx512f")]
25112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25113#[cfg_attr(test, assert_instr(vunpcklps))]
25114pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
25115 #[rustfmt::skip]
25116 simd_shuffle!(a, b,
25117 [ 0, 16, 1, 17,
25118 0 + 4, 16 + 4, 1 + 4, 17 + 4,
25119 0 + 8, 16 + 8, 1 + 8, 17 + 8,
25120 0 + 12, 16 + 12, 1 + 12, 17 + 12],
25121 )
25122}
25123
25124/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25125///
25126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
25127#[inline]
25128#[target_feature(enable = "avx512f")]
25129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25130#[cfg_attr(test, assert_instr(vunpcklps))]
25131pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
25132 let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
25133 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
25134}
25135
25136/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25137///
25138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
25139#[inline]
25140#[target_feature(enable = "avx512f")]
25141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25142#[cfg_attr(test, assert_instr(vunpcklps))]
25143pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
25144 let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
25145 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
25146 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25147}
25148
25149/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25150///
25151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
25152#[inline]
25153#[target_feature(enable = "avx512f,avx512vl")]
25154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25155#[cfg_attr(test, assert_instr(vunpcklps))]
25156pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
25157 let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
25158 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
25159}
25160
25161/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25162///
25163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
25164#[inline]
25165#[target_feature(enable = "avx512f,avx512vl")]
25166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25167#[cfg_attr(test, assert_instr(vunpcklps))]
25168pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
25169 let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
25170 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
25171 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25172}
25173
25174/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25175///
25176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
25177#[inline]
25178#[target_feature(enable = "avx512f,avx512vl")]
25179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25180#[cfg_attr(test, assert_instr(vunpcklps))]
25181pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
25182 let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
25183 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
25184}
25185
25186/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25187///
25188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
25189#[inline]
25190#[target_feature(enable = "avx512f,avx512vl")]
25191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25192#[cfg_attr(test, assert_instr(vunpcklps))]
25193pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
25194 let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
25195 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
25196 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25197}
25198
25199/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
25200///
25201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
25202#[inline]
25203#[target_feature(enable = "avx512f")]
25204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25205#[cfg_attr(test, assert_instr(vunpcklpd))]
25206pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
25207 simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
25208}
25209
25210/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25211///
25212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
25213#[inline]
25214#[target_feature(enable = "avx512f")]
25215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25216#[cfg_attr(test, assert_instr(vunpcklpd))]
25217pub unsafe fn _mm512_mask_unpacklo_pd(
25218 src: __m512d,
25219 k: __mmask8,
25220 a: __m512d,
25221 b: __m512d,
25222) -> __m512d {
25223 let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
25224 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
25225}
25226
25227/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25228///
25229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
25230#[inline]
25231#[target_feature(enable = "avx512f")]
25232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25233#[cfg_attr(test, assert_instr(vunpcklpd))]
25234pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
25235 let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
25236 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
25237 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25238}
25239
25240/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25241///
25242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
25243#[inline]
25244#[target_feature(enable = "avx512f,avx512vl")]
25245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25246#[cfg_attr(test, assert_instr(vunpcklpd))]
25247pub unsafe fn _mm256_mask_unpacklo_pd(
25248 src: __m256d,
25249 k: __mmask8,
25250 a: __m256d,
25251 b: __m256d,
25252) -> __m256d {
25253 let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
25254 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
25255}
25256
25257/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25258///
25259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
25260#[inline]
25261#[target_feature(enable = "avx512f,avx512vl")]
25262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25263#[cfg_attr(test, assert_instr(vunpcklpd))]
25264pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
25265 let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
25266 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
25267 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25268}
25269
25270/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25271///
25272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
25273#[inline]
25274#[target_feature(enable = "avx512f,avx512vl")]
25275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25276#[cfg_attr(test, assert_instr(vunpcklpd))]
25277pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25278 let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
25279 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
25280}
25281
25282/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25283///
25284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
25285#[inline]
25286#[target_feature(enable = "avx512f,avx512vl")]
25287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25288#[cfg_attr(test, assert_instr(vunpcklpd))]
25289pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25290 let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
25291 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
25292 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:zero))
25293}
25294
25295/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25296///
25297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps128_ps512&expand=621)
25298#[inline]
25299#[target_feature(enable = "avx512f")]
25300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25301pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
25302 simd_shuffle!(
25303 a,
25304 _mm_set1_ps(-1.),
25305 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
25306 )
25307}
25308
25309/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25310///
25311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps256_ps512&expand=623)
25312#[inline]
25313#[target_feature(enable = "avx512f")]
25314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25315pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
25316 simd_shuffle!(
25317 a,
25318 _mm256_set1_ps(-1.),
25319 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
25320 )
25321}
25322
25323/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25324///
25325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextps128_ps512&expand=6196)
25326#[inline]
25327#[target_feature(enable = "avx512f")]
25328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25329pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
25330 simd_shuffle!(
25331 a,
25332 _mm_set1_ps(0.),
25333 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
25334 )
25335}
25336
25337/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25338///
25339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextps256_ps512&expand=6197)
25340#[inline]
25341#[target_feature(enable = "avx512f")]
25342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25343pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
25344 simd_shuffle!(
25345 a,
25346 _mm256_set1_ps(0.),
25347 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
25348 )
25349}
25350
25351/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25352///
25353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps512_ps128&expand=624)
25354#[inline]
25355#[target_feature(enable = "avx512f")]
25356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25357pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
25358 simd_shuffle!(a, a, [0, 1, 2, 3])
25359}
25360
25361/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25362///
25363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps512_ps256&expand=625)
25364#[inline]
25365#[target_feature(enable = "avx512f")]
25366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25367pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
25368 simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
25369}
25370
25371/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25372///
25373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps_pd&expand=616)
25374#[inline]
25375#[target_feature(enable = "avx512f")]
25376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25377pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
25378 transmute(src:a.as_m512())
25379}
25380
25381/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25382///
25383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castps_si512&expand=619)
25384#[inline]
25385#[target_feature(enable = "avx512f")]
25386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25387pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
25388 transmute(src:a.as_m512())
25389}
25390
25391/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25392///
25393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd128_pd512&expand=609)
25394#[inline]
25395#[target_feature(enable = "avx512f")]
25396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25397pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
25398 simd_shuffle!(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
25399}
25400
25401/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25402///
25403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd256_pd512&expand=611)
25404#[inline]
25405#[target_feature(enable = "avx512f")]
25406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25407pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
25408 simd_shuffle!(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
25409}
25410
25411/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25412///
25413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextpd128_pd512&expand=6193)
25414#[inline]
25415#[target_feature(enable = "avx512f")]
25416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25417pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
25418 simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
25419}
25420
25421/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25422///
25423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextpd256_pd512&expand=6194)
25424#[inline]
25425#[target_feature(enable = "avx512f")]
25426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25427pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
25428 simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
25429}
25430
25431/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25432///
25433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd512_pd128&expand=612)
25434#[inline]
25435#[target_feature(enable = "avx512f")]
25436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25437pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
25438 simd_shuffle!(a, a, [0, 1])
25439}
25440
25441/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25442///
25443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd512_pd256&expand=613)
25444#[inline]
25445#[target_feature(enable = "avx512f")]
25446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25447pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
25448 simd_shuffle!(a, a, [0, 1, 2, 3])
25449}
25450
25451/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25452///
25453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd_ps&expand=604)
25454#[inline]
25455#[target_feature(enable = "avx512f")]
25456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25457pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
25458 transmute(src:a.as_m512d())
25459}
25460
25461/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25462///
25463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castpd_si512&expand=607)
25464#[inline]
25465#[target_feature(enable = "avx512f")]
25466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25467pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
25468 transmute(src:a.as_m512d())
25469}
25470
25471/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25472///
25473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi128_si512&expand=629)
25474#[inline]
25475#[target_feature(enable = "avx512f")]
25476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25477pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
25478 simd_shuffle!(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
25479}
25480
25481/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25482///
25483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi256_si512&expand=633)
25484#[inline]
25485#[target_feature(enable = "avx512f")]
25486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25487pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
25488 simd_shuffle!(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
25489}
25490
25491/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25492///
25493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextsi128_si512&expand=6199)
25494#[inline]
25495#[target_feature(enable = "avx512f")]
25496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25497pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
25498 simd_shuffle!(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
25499}
25500
25501/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25502///
25503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_zextsi256_si512&expand=6200)
25504#[inline]
25505#[target_feature(enable = "avx512f")]
25506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25507pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
25508 simd_shuffle!(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
25509}
25510
25511/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25512///
25513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_si128&expand=636)
25514#[inline]
25515#[target_feature(enable = "avx512f")]
25516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25517pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
25518 simd_shuffle!(a, a, [0, 1])
25519}
25520
25521/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25522///
25523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_si256&expand=637)
25524#[inline]
25525#[target_feature(enable = "avx512f")]
25526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25527pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
25528 simd_shuffle!(a, a, [0, 1, 2, 3])
25529}
25530
25531/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25532///
25533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_ps&expand=635)
25534#[inline]
25535#[target_feature(enable = "avx512f")]
25536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25537pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
25538 transmute(src:a)
25539}
25540
25541/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25542///
25543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_castsi512_pd&expand=634)
25544#[inline]
25545#[target_feature(enable = "avx512f")]
25546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25547pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
25548 transmute(src:a)
25549}
25550
25551/// Copy the lower 32-bit integer in a to dst.
25552///
25553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
25554#[inline]
25555#[target_feature(enable = "avx512f")]
25556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25557#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))]
25558pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
25559 let extract: i32 = simd_extract!(a.as_i32x16(), 0);
25560 extract
25561}
25562
25563/// Broadcast the low packed 32-bit integer from a to all elements of dst.
25564///
25565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
25566#[inline]
25567#[target_feature(enable = "avx512f")]
25568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25569#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
25570pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
25571 let a: i32x16 = _mm512_castsi128_si512(a).as_i32x16();
25572 let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
25573 transmute(src:ret)
25574}
25575
25576/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25577///
25578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
25579#[inline]
25580#[target_feature(enable = "avx512f")]
25581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25582#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25583pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
25584 let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
25585 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
25586}
25587
25588/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25589///
25590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
25591#[inline]
25592#[target_feature(enable = "avx512f")]
25593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25594#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25595pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
25596 let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
25597 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
25598 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25599}
25600
25601/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25602///
25603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
25604#[inline]
25605#[target_feature(enable = "avx512f,avx512vl")]
25606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25607#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25608pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25609 let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
25610 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
25611}
25612
25613/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25614///
25615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
25616#[inline]
25617#[target_feature(enable = "avx512f,avx512vl")]
25618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25619#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25620pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
25621 let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
25622 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
25623 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25624}
25625
25626/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25627///
25628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
25629#[inline]
25630#[target_feature(enable = "avx512f,avx512vl")]
25631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25632#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25633pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
25634 let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
25635 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
25636}
25637
25638/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25639///
25640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
25641#[inline]
25642#[target_feature(enable = "avx512f,avx512vl")]
25643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25644#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25645pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
25646 let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
25647 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
25648 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25649}
25650
25651/// Broadcast the low packed 64-bit integer from a to all elements of dst.
25652///
25653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
25654#[inline]
25655#[target_feature(enable = "avx512f")]
25656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25657#[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq
25658pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
25659 simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
25660}
25661
25662/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25663///
25664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
25665#[inline]
25666#[target_feature(enable = "avx512f")]
25667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25668#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25669pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
25670 let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
25671 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
25672}
25673
25674/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25675///
25676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
25677#[inline]
25678#[target_feature(enable = "avx512f")]
25679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25680#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25681pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
25682 let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
25683 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
25684 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25685}
25686
25687/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25688///
25689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
25690#[inline]
25691#[target_feature(enable = "avx512f,avx512vl")]
25692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25693#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25694pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25695 let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
25696 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
25697}
25698
25699/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25700///
25701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
25702#[inline]
25703#[target_feature(enable = "avx512f,avx512vl")]
25704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25705#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25706pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
25707 let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
25708 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
25709 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25710}
25711
25712/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25713///
25714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
25715#[inline]
25716#[target_feature(enable = "avx512f,avx512vl")]
25717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25718#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25719pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
25720 let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
25721 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
25722}
25723
25724/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25725///
25726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
25727#[inline]
25728#[target_feature(enable = "avx512f,avx512vl")]
25729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25730#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25731pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
25732 let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
25733 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
25734 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25735}
25736
25737/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
25738///
25739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
25740#[inline]
25741#[target_feature(enable = "avx512f")]
25742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25743#[cfg_attr(test, assert_instr(vbroadcastss))]
25744pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
25745 simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
25746}
25747
25748/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25749///
25750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
25751#[inline]
25752#[target_feature(enable = "avx512f")]
25753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25754#[cfg_attr(test, assert_instr(vbroadcastss))]
25755pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
25756 let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
25757 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
25758}
25759
25760/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25761///
25762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
25763#[inline]
25764#[target_feature(enable = "avx512f")]
25765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25766#[cfg_attr(test, assert_instr(vbroadcastss))]
25767pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
25768 let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
25769 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
25770 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25771}
25772
25773/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25774///
25775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
25776#[inline]
25777#[target_feature(enable = "avx512f,avx512vl")]
25778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25779#[cfg_attr(test, assert_instr(vbroadcastss))]
25780pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
25781 let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
25782 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
25783}
25784
25785/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25786///
25787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
25788#[inline]
25789#[target_feature(enable = "avx512f,avx512vl")]
25790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25791#[cfg_attr(test, assert_instr(vbroadcastss))]
25792pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
25793 let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
25794 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
25795 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25796}
25797
25798/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25799///
25800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
25801#[inline]
25802#[target_feature(enable = "avx512f,avx512vl")]
25803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25804#[cfg_attr(test, assert_instr(vbroadcastss))]
25805pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25806 let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
25807 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
25808}
25809
25810/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25811///
25812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
25813#[inline]
25814#[target_feature(enable = "avx512f,avx512vl")]
25815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25816#[cfg_attr(test, assert_instr(vbroadcastss))]
25817pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
25818 let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
25819 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
25820 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25821}
25822
25823/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
25824///
25825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
25826#[inline]
25827#[target_feature(enable = "avx512f")]
25828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25829#[cfg_attr(test, assert_instr(vbroadcastsd))]
25830pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
25831 simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
25832}
25833
25834/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25835///
25836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
25837#[inline]
25838#[target_feature(enable = "avx512f")]
25839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25840#[cfg_attr(test, assert_instr(vbroadcastsd))]
25841pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
25842 let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
25843 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
25844}
25845
25846/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25847///
25848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
25849#[inline]
25850#[target_feature(enable = "avx512f")]
25851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25852#[cfg_attr(test, assert_instr(vbroadcastsd))]
25853pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
25854 let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
25855 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
25856 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25857}
25858
25859/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25860///
25861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
25862#[inline]
25863#[target_feature(enable = "avx512f,avx512vl")]
25864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25865#[cfg_attr(test, assert_instr(vbroadcastsd))]
25866pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
25867 let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
25868 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
25869}
25870
25871/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25872///
25873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
25874#[inline]
25875#[target_feature(enable = "avx512f,avx512vl")]
25876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25877#[cfg_attr(test, assert_instr(vbroadcastsd))]
25878pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
25879 let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
25880 let zero: f64x4 = _mm256_setzero_pd().as_f64x4();
25881 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25882}
25883
25884/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
25885///
25886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
25887#[inline]
25888#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25890pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
25891 let a: i32x4 = a.as_i32x4();
25892 let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
25893 transmute(src:ret)
25894}
25895
25896/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25897///
25898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
25899#[inline]
25900#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25902pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
25903 let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
25904 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
25905}
25906
25907/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25908///
25909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
25910#[inline]
25911#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25913pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
25914 let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
25915 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
25916 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25917}
25918
25919/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
25920///
25921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
25922#[inline]
25923#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25925pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
25926 let a: i32x4 = a.as_i32x4();
25927 let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
25928 transmute(src:ret)
25929}
25930
25931/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25932///
25933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
25934#[inline]
25935#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25937pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25938 let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
25939 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
25940}
25941
25942/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25943///
25944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
25945#[inline]
25946#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25948pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
25949 let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
25950 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
25951 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25952}
25953
25954/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
25955///
25956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_broadcast_i64x4&expand=522)
25957#[inline]
25958#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25960pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
25961 simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
25962}
25963
25964/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25965///
25966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_broadcast_i64x4&expand=523)
25967#[inline]
25968#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25970pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
25971 let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
25972 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
25973}
25974
25975/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25976///
25977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_broadcast_i64x4&expand=524)
25978#[inline]
25979#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25981pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
25982 let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
25983 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
25984 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
25985}
25986
25987/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
25988///
25989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
25990#[inline]
25991#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
25992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25993pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
25994 simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
25995}
25996
25997/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25998///
25999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
26000#[inline]
26001#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
26002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26003pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
26004 let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
26005 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
26006}
26007
26008/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26009///
26010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
26011#[inline]
26012#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
26013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
26015 let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
26016 let zero: f32x16 = _mm512_setzero_ps().as_f32x16();
26017 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
26018}
26019
26020/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
26021///
26022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
26023#[inline]
26024#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
26025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26026pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
26027 simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
26028}
26029
26030/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26031///
26032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
26033#[inline]
26034#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
26035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26036pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
26037 let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
26038 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
26039}
26040
26041/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26042///
26043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
26044#[inline]
26045#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
26046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26047pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
26048 let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
26049 let zero: f32x8 = _mm256_setzero_ps().as_f32x8();
26050 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
26051}
26052
26053/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
26054///
26055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_broadcast_f64x4&expand=495)
26056#[inline]
26057#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
26058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26059pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
26060 simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
26061}
26062
26063/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26064///
26065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_broadcast_f64x4&expand=496)
26066#[inline]
26067#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
26068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26069pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
26070 let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
26071 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
26072}
26073
26074/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26075///
26076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_broadcast_f64x4&expand=497)
26077#[inline]
26078#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
26079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26080pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
26081 let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
26082 let zero: f64x8 = _mm512_setzero_pd().as_f64x8();
26083 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:zero))
26084}
26085
26086/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26087///
26088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
26089#[inline]
26090#[target_feature(enable = "avx512f")]
26091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26092#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26093pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26094 transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16()))
26095}
26096
26097/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26098///
26099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
26100#[inline]
26101#[target_feature(enable = "avx512f,avx512vl")]
26102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26103#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26104pub unsafe fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26105 transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8()))
26106}
26107
26108/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26109///
26110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
26111#[inline]
26112#[target_feature(enable = "avx512f,avx512vl")]
26113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26114#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26115pub unsafe fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26116 transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4()))
26117}
26118
26119/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26120///
26121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
26122#[inline]
26123#[target_feature(enable = "avx512f")]
26124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26125#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26126pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26127 transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8()))
26128}
26129
26130/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26131///
26132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
26133#[inline]
26134#[target_feature(enable = "avx512f,avx512vl")]
26135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26136#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26137pub unsafe fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26138 transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4()))
26139}
26140
26141/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26142///
26143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
26144#[inline]
26145#[target_feature(enable = "avx512f,avx512vl")]
26146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26147#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26148pub unsafe fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26149 transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2()))
26150}
26151
26152/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26153///
26154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
26155#[inline]
26156#[target_feature(enable = "avx512f")]
26157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26158#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26159pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26160 transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16()))
26161}
26162
26163/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26164///
26165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
26166#[inline]
26167#[target_feature(enable = "avx512f,avx512vl")]
26168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26169#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26170pub unsafe fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26171 transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8()))
26172}
26173
26174/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26175///
26176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
26177#[inline]
26178#[target_feature(enable = "avx512f,avx512vl")]
26179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26180#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26181pub unsafe fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26182 transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4()))
26183}
26184
26185/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26186///
26187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
26188#[inline]
26189#[target_feature(enable = "avx512f")]
26190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26191#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26192pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26193 transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8()))
26194}
26195
26196/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26197///
26198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
26199#[inline]
26200#[target_feature(enable = "avx512f,avx512vl")]
26201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26202#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26203pub unsafe fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26204 transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4()))
26205}
26206
26207/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26208///
26209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
26210#[inline]
26211#[target_feature(enable = "avx512f,avx512vl")]
26212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26213#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26214pub unsafe fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26215 transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2()))
26216}
26217
26218/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
26219///
26220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_alignr_epi32&expand=245)
26221#[inline]
26222#[target_feature(enable = "avx512f")]
26223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26224#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26225#[rustc_legacy_const_generics(2)]
26226pub unsafe fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
26227 static_assert_uimm_bits!(IMM8, 8);
26228 let a = a.as_i32x16();
26229 let b = b.as_i32x16();
26230 let imm8: i32 = IMM8 % 16;
26231 let r: i32x16 = match imm8 {
26232 0 => simd_shuffle!(
26233 a,
26234 b,
26235 [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,],
26236 ),
26237 1 => simd_shuffle!(
26238 a,
26239 b,
26240 [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,],
26241 ),
26242 2 => simd_shuffle!(
26243 a,
26244 b,
26245 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
26246 ),
26247 3 => simd_shuffle!(
26248 a,
26249 b,
26250 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
26251 ),
26252 4 => simd_shuffle!(
26253 a,
26254 b,
26255 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
26256 ),
26257 5 => simd_shuffle!(
26258 a,
26259 b,
26260 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
26261 ),
26262 6 => simd_shuffle!(
26263 a,
26264 b,
26265 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
26266 ),
26267 7 => simd_shuffle!(
26268 a,
26269 b,
26270 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
26271 ),
26272 8 => simd_shuffle!(
26273 a,
26274 b,
26275 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
26276 ),
26277 9 => simd_shuffle!(
26278 a,
26279 b,
26280 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
26281 ),
26282 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
26283 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
26284 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
26285 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
26286 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
26287 _ => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
26288 };
26289 transmute(r)
26290}
26291
26292/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26293///
26294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_alignr_epi32&expand=246)
26295#[inline]
26296#[target_feature(enable = "avx512f")]
26297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26298#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26299#[rustc_legacy_const_generics(4)]
26300pub unsafe fn _mm512_mask_alignr_epi32<const IMM8: i32>(
26301 src: __m512i,
26302 k: __mmask16,
26303 a: __m512i,
26304 b: __m512i,
26305) -> __m512i {
26306 static_assert_uimm_bits!(IMM8, 8);
26307 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
26308 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
26309}
26310
26311/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26312///
26313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_alignr_epi32&expand=247)
26314#[inline]
26315#[target_feature(enable = "avx512f")]
26316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26317#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26318#[rustc_legacy_const_generics(3)]
26319pub unsafe fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
26320 k: __mmask16,
26321 a: __m512i,
26322 b: __m512i,
26323) -> __m512i {
26324 static_assert_uimm_bits!(IMM8, 8);
26325 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
26326 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
26327 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:zero))
26328}
26329
26330/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
26331///
26332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
26333#[inline]
26334#[target_feature(enable = "avx512f,avx512vl")]
26335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26336#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26337#[rustc_legacy_const_generics(2)]
26338pub unsafe fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
26339 static_assert_uimm_bits!(IMM8, 8);
26340 let a = a.as_i32x8();
26341 let b = b.as_i32x8();
26342 let imm8: i32 = IMM8 % 16;
26343 let r: i32x8 = match imm8 {
26344 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
26345 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
26346 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
26347 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
26348 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
26349 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
26350 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
26351 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
26352 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
26353 9 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
26354 10 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
26355 11 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
26356 12 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
26357 13 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
26358 14 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
26359 _ => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
26360 };
26361 transmute(r)
26362}
26363
26364/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26365///
26366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
26367#[inline]
26368#[target_feature(enable = "avx512f,avx512vl")]
26369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26370#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26371#[rustc_legacy_const_generics(4)]
26372pub unsafe fn _mm256_mask_alignr_epi32<const IMM8: i32>(
26373 src: __m256i,
26374 k: __mmask8,
26375 a: __m256i,
26376 b: __m256i,
26377) -> __m256i {
26378 static_assert_uimm_bits!(IMM8, 8);
26379 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
26380 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
26381}
26382
26383/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26384///
26385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
26386#[inline]
26387#[target_feature(enable = "avx512f,avx512vl")]
26388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26389#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26390#[rustc_legacy_const_generics(3)]
26391pub unsafe fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
26392 k: __mmask8,
26393 a: __m256i,
26394 b: __m256i,
26395) -> __m256i {
26396 static_assert_uimm_bits!(IMM8, 8);
26397 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
26398 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
26399 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:zero))
26400}
26401
26402/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
26403///
26404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
26405#[inline]
26406#[target_feature(enable = "avx512f,avx512vl")]
26407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26408#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
26409#[rustc_legacy_const_generics(2)]
26410pub unsafe fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
26411 static_assert_uimm_bits!(IMM8, 8);
26412 let a: i32x4 = a.as_i32x4();
26413 let b: i32x4 = b.as_i32x4();
26414 let imm8: i32 = IMM8 % 8;
26415 let r: i32x4 = match imm8 {
26416 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
26417 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
26418 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
26419 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
26420 4 => simd_shuffle!(a, b, [0, 1, 2, 3]),
26421 5 => simd_shuffle!(a, b, [1, 2, 3, 0]),
26422 6 => simd_shuffle!(a, b, [2, 3, 0, 1]),
26423 _ => simd_shuffle!(a, b, [3, 0, 1, 2]),
26424 };
26425 transmute(src:r)
26426}
26427
26428/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26429///
26430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
26431#[inline]
26432#[target_feature(enable = "avx512f,avx512vl")]
26433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26434#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26435#[rustc_legacy_const_generics(4)]
26436pub unsafe fn _mm_mask_alignr_epi32<const IMM8: i32>(
26437 src: __m128i,
26438 k: __mmask8,
26439 a: __m128i,
26440 b: __m128i,
26441) -> __m128i {
26442 static_assert_uimm_bits!(IMM8, 8);
26443 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
26444 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
26445}
26446
26447/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26448///
26449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
26450#[inline]
26451#[target_feature(enable = "avx512f,avx512vl")]
26452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26453#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26454#[rustc_legacy_const_generics(3)]
26455pub unsafe fn _mm_maskz_alignr_epi32<const IMM8: i32>(
26456 k: __mmask8,
26457 a: __m128i,
26458 b: __m128i,
26459) -> __m128i {
26460 static_assert_uimm_bits!(IMM8, 8);
26461 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
26462 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
26463 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:zero))
26464}
26465
26466/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
26467///
26468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_alignr_epi64&expand=254)
26469#[inline]
26470#[target_feature(enable = "avx512f")]
26471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26472#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26473#[rustc_legacy_const_generics(2)]
26474pub unsafe fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
26475 static_assert_uimm_bits!(IMM8, 8);
26476 let imm8: i32 = IMM8 % 8;
26477 let r: i64x8 = match imm8 {
26478 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
26479 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
26480 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
26481 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
26482 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
26483 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
26484 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
26485 _ => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
26486 };
26487 transmute(src:r)
26488}
26489
26490/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26491///
26492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask_alignr_epi64&expand=255)
26493#[inline]
26494#[target_feature(enable = "avx512f")]
26495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26496#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26497#[rustc_legacy_const_generics(4)]
26498pub unsafe fn _mm512_mask_alignr_epi64<const IMM8: i32>(
26499 src: __m512i,
26500 k: __mmask8,
26501 a: __m512i,
26502 b: __m512i,
26503) -> __m512i {
26504 static_assert_uimm_bits!(IMM8, 8);
26505 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
26506 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
26507}
26508
26509/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26510///
26511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_maskz_alignr_epi64&expand=256)
26512#[inline]
26513#[target_feature(enable = "avx512f")]
26514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26515#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26516#[rustc_legacy_const_generics(3)]
26517pub unsafe fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
26518 k: __mmask8,
26519 a: __m512i,
26520 b: __m512i,
26521) -> __m512i {
26522 static_assert_uimm_bits!(IMM8, 8);
26523 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
26524 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
26525 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:zero))
26526}
26527
26528/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
26529///
26530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
26531#[inline]
26532#[target_feature(enable = "avx512f,avx512vl")]
26533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26534#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26535#[rustc_legacy_const_generics(2)]
26536pub unsafe fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
26537 static_assert_uimm_bits!(IMM8, 8);
26538 let imm8: i32 = IMM8 % 8;
26539 let r: i64x4 = match imm8 {
26540 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
26541 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
26542 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
26543 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
26544 4 => simd_shuffle!(a, b, [0, 1, 2, 3]),
26545 5 => simd_shuffle!(a, b, [1, 2, 3, 4]),
26546 6 => simd_shuffle!(a, b, [2, 3, 4, 5]),
26547 _ => simd_shuffle!(a, b, [3, 4, 5, 6]),
26548 };
26549 transmute(src:r)
26550}
26551
26552/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26553///
26554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
26555#[inline]
26556#[target_feature(enable = "avx512f,avx512vl")]
26557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26558#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26559#[rustc_legacy_const_generics(4)]
26560pub unsafe fn _mm256_mask_alignr_epi64<const IMM8: i32>(
26561 src: __m256i,
26562 k: __mmask8,
26563 a: __m256i,
26564 b: __m256i,
26565) -> __m256i {
26566 static_assert_uimm_bits!(IMM8, 8);
26567 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
26568 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
26569}
26570
26571/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26572///
26573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
26574#[inline]
26575#[target_feature(enable = "avx512f,avx512vl")]
26576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26577#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26578#[rustc_legacy_const_generics(3)]
26579pub unsafe fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
26580 k: __mmask8,
26581 a: __m256i,
26582 b: __m256i,
26583) -> __m256i {
26584 static_assert_uimm_bits!(IMM8, 8);
26585 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
26586 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
26587 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:zero))
26588}
26589
26590/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
26591///
26592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
26593#[inline]
26594#[target_feature(enable = "avx512f,avx512vl")]
26595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26596#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
26597#[rustc_legacy_const_generics(2)]
26598pub unsafe fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
26599 static_assert_uimm_bits!(IMM8, 8);
26600 let imm8: i32 = IMM8 % 4;
26601 let r: i64x2 = match imm8 {
26602 0 => simd_shuffle!(a, b, [2, 3]),
26603 1 => simd_shuffle!(a, b, [3, 0]),
26604 2 => simd_shuffle!(a, b, [0, 1]),
26605 _ => simd_shuffle!(a, b, [1, 2]),
26606 };
26607 transmute(src:r)
26608}
26609
26610/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26611///
26612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
26613#[inline]
26614#[target_feature(enable = "avx512f,avx512vl")]
26615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26616#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26617#[rustc_legacy_const_generics(4)]
26618pub unsafe fn _mm_mask_alignr_epi64<const IMM8: i32>(
26619 src: __m128i,
26620 k: __mmask8,
26621 a: __m128i,
26622 b: __m128i,
26623) -> __m128i {
26624 static_assert_uimm_bits!(IMM8, 8);
26625 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
26626 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
26627}
26628
26629/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26630///
26631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
26632#[inline]
26633#[target_feature(enable = "avx512f,avx512vl")]
26634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26635#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26636#[rustc_legacy_const_generics(3)]
26637pub unsafe fn _mm_maskz_alignr_epi64<const IMM8: i32>(
26638 k: __mmask8,
26639 a: __m128i,
26640 b: __m128i,
26641) -> __m128i {
26642 static_assert_uimm_bits!(IMM8, 8);
26643 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
26644 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
26645 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:zero))
26646}
26647
26648/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
26649///
26650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
26651#[inline]
26652#[target_feature(enable = "avx512f")]
26653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26654#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
26655pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
26656 transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16()))
26657}
26658
26659/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26660///
26661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
26662#[inline]
26663#[target_feature(enable = "avx512f")]
26664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26665#[cfg_attr(test, assert_instr(vpandd))]
26666pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26667 let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
26668 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
26669}
26670
26671/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26672///
26673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
26674#[inline]
26675#[target_feature(enable = "avx512f")]
26676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26677#[cfg_attr(test, assert_instr(vpandd))]
26678pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26679 let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
26680 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
26681 transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26682}
26683
26684/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26685///
26686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
26687#[inline]
26688#[target_feature(enable = "avx512f,avx512vl")]
26689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26690#[cfg_attr(test, assert_instr(vpandd))]
26691pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26692 let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
26693 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
26694}
26695
26696/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26697///
26698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
26699#[inline]
26700#[target_feature(enable = "avx512f,avx512vl")]
26701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26702#[cfg_attr(test, assert_instr(vpandd))]
26703pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26704 let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
26705 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
26706 transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26707}
26708
26709/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26710///
26711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
26712#[inline]
26713#[target_feature(enable = "avx512f,avx512vl")]
26714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26715#[cfg_attr(test, assert_instr(vpandd))]
26716pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26717 let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
26718 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
26719}
26720
26721/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26722///
26723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
26724#[inline]
26725#[target_feature(enable = "avx512f,avx512vl")]
26726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26727#[cfg_attr(test, assert_instr(vpandd))]
26728pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26729 let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
26730 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
26731 transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26732}
26733
26734/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
26735///
26736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
26737#[inline]
26738#[target_feature(enable = "avx512f")]
26739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26740#[cfg_attr(test, assert_instr(vpandq))]
26741pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
26742 transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8()))
26743}
26744
26745/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26746///
26747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
26748#[inline]
26749#[target_feature(enable = "avx512f")]
26750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26751#[cfg_attr(test, assert_instr(vpandq))]
26752pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26753 let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
26754 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
26755}
26756
26757/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26758///
26759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
26760#[inline]
26761#[target_feature(enable = "avx512f")]
26762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26763#[cfg_attr(test, assert_instr(vpandq))]
26764pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26765 let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
26766 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
26767 transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26768}
26769
26770/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26771///
26772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
26773#[inline]
26774#[target_feature(enable = "avx512f,avx512vl")]
26775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26776#[cfg_attr(test, assert_instr(vpandq))]
26777pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26778 let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
26779 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
26780}
26781
26782/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26783///
26784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
26785#[inline]
26786#[target_feature(enable = "avx512f,avx512vl")]
26787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26788#[cfg_attr(test, assert_instr(vpandq))]
26789pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26790 let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
26791 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
26792 transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26793}
26794
26795/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26796///
26797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
26798#[inline]
26799#[target_feature(enable = "avx512f,avx512vl")]
26800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26801#[cfg_attr(test, assert_instr(vpandq))]
26802pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26803 let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
26804 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
26805}
26806
26807/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26808///
26809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
26810#[inline]
26811#[target_feature(enable = "avx512f,avx512vl")]
26812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26813#[cfg_attr(test, assert_instr(vpandq))]
26814pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26815 let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
26816 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
26817 transmute(src:simd_select_bitmask(m:k, yes:and, no:zero))
26818}
26819
26820/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
26821///
26822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
26823#[inline]
26824#[target_feature(enable = "avx512f")]
26825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26826#[cfg_attr(test, assert_instr(vpandq))]
26827pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
26828 transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16()))
26829}
26830
26831/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837#[cfg_attr(test, assert_instr(vporq))]
26838pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
26839 transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16()))
26840}
26841
26842/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26843///
26844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
26845#[inline]
26846#[target_feature(enable = "avx512f")]
26847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26848#[cfg_attr(test, assert_instr(vpord))]
26849pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26850 let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
26851 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
26852}
26853
26854/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26855///
26856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
26857#[inline]
26858#[target_feature(enable = "avx512f")]
26859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26860#[cfg_attr(test, assert_instr(vpord))]
26861pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26862 let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
26863 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
26864 transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26865}
26866
26867/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26868///
26869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
26870#[inline]
26871#[target_feature(enable = "avx512f,avx512vl")]
26872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26873#[cfg_attr(test, assert_instr(vor))] //should be vpord
26874pub unsafe fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
26875 transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8()))
26876}
26877
26878/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26879///
26880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
26881#[inline]
26882#[target_feature(enable = "avx512f,avx512vl")]
26883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26884#[cfg_attr(test, assert_instr(vpord))]
26885pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26886 let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
26887 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
26888}
26889
26890/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26891///
26892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
26893#[inline]
26894#[target_feature(enable = "avx512f,avx512vl")]
26895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26896#[cfg_attr(test, assert_instr(vpord))]
26897pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26898 let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
26899 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
26900 transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26901}
26902
26903/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26904///
26905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
26906#[inline]
26907#[target_feature(enable = "avx512f,avx512vl")]
26908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26909#[cfg_attr(test, assert_instr(vor))] //should be vpord
26910pub unsafe fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
26911 transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4()))
26912}
26913
26914/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26915///
26916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
26917#[inline]
26918#[target_feature(enable = "avx512f,avx512vl")]
26919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26920#[cfg_attr(test, assert_instr(vpord))]
26921pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26922 let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
26923 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
26924}
26925
26926/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26927///
26928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
26929#[inline]
26930#[target_feature(enable = "avx512f,avx512vl")]
26931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26932#[cfg_attr(test, assert_instr(vpord))]
26933pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26934 let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
26935 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
26936 transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26937}
26938
26939/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26940///
26941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
26942#[inline]
26943#[target_feature(enable = "avx512f")]
26944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26945#[cfg_attr(test, assert_instr(vporq))]
26946pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
26947 transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8()))
26948}
26949
26950/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26951///
26952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
26953#[inline]
26954#[target_feature(enable = "avx512f")]
26955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26956#[cfg_attr(test, assert_instr(vporq))]
26957pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26958 let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
26959 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
26960}
26961
26962/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26963///
26964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
26965#[inline]
26966#[target_feature(enable = "avx512f")]
26967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26968#[cfg_attr(test, assert_instr(vporq))]
26969pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26970 let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
26971 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
26972 transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
26973}
26974
26975/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26976///
26977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
26978#[inline]
26979#[target_feature(enable = "avx512f,avx512vl")]
26980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26981#[cfg_attr(test, assert_instr(vor))] //should be vporq
26982pub unsafe fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
26983 transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4()))
26984}
26985
26986/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26987///
26988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
26989#[inline]
26990#[target_feature(enable = "avx512f,avx512vl")]
26991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26992#[cfg_attr(test, assert_instr(vporq))]
26993pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26994 let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
26995 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
26996}
26997
26998/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26999///
27000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
27001#[inline]
27002#[target_feature(enable = "avx512f,avx512vl")]
27003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27004#[cfg_attr(test, assert_instr(vporq))]
27005pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27006 let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
27007 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
27008 transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
27009}
27010
27011/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
27012///
27013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
27014#[inline]
27015#[target_feature(enable = "avx512f,avx512vl")]
27016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27017#[cfg_attr(test, assert_instr(vor))] //should be vporq
27018pub unsafe fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
27019 transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2()))
27020}
27021
27022/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27023///
27024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
27025#[inline]
27026#[target_feature(enable = "avx512f,avx512vl")]
27027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27028#[cfg_attr(test, assert_instr(vporq))]
27029pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27030 let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
27031 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
27032}
27033
27034/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27035///
27036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
27037#[inline]
27038#[target_feature(enable = "avx512f,avx512vl")]
27039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27040#[cfg_attr(test, assert_instr(vporq))]
27041pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27042 let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
27043 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
27044 transmute(src:simd_select_bitmask(m:k, yes:or, no:zero))
27045}
27046
27047/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
27048///
27049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
27050#[inline]
27051#[target_feature(enable = "avx512f")]
27052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27053#[cfg_attr(test, assert_instr(vporq))]
27054pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
27055 transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16()))
27056}
27057
27058/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27059///
27060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
27061#[inline]
27062#[target_feature(enable = "avx512f")]
27063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27064#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
27065pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
27066 transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16()))
27067}
27068
27069/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27070///
27071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
27072#[inline]
27073#[target_feature(enable = "avx512f")]
27074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27075#[cfg_attr(test, assert_instr(vpxord))]
27076pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27077 let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
27078 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
27079}
27080
27081/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27082///
27083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
27084#[inline]
27085#[target_feature(enable = "avx512f")]
27086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27087#[cfg_attr(test, assert_instr(vpxord))]
27088pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27089 let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
27090 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
27091 transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27092}
27093
27094/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27095///
27096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
27097#[inline]
27098#[target_feature(enable = "avx512f,avx512vl")]
27099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27100#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
27101pub unsafe fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
27102 transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8()))
27103}
27104
27105/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27106///
27107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
27108#[inline]
27109#[target_feature(enable = "avx512f,avx512vl")]
27110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27111#[cfg_attr(test, assert_instr(vpxord))]
27112pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27113 let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
27114 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
27115}
27116
27117/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27118///
27119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
27120#[inline]
27121#[target_feature(enable = "avx512f,avx512vl")]
27122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27123#[cfg_attr(test, assert_instr(vpxord))]
27124pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27125 let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
27126 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
27127 transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27128}
27129
27130/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27131///
27132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
27133#[inline]
27134#[target_feature(enable = "avx512f,avx512vl")]
27135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27136#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
27137pub unsafe fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
27138 transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4()))
27139}
27140
27141/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27142///
27143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
27144#[inline]
27145#[target_feature(enable = "avx512f,avx512vl")]
27146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27147#[cfg_attr(test, assert_instr(vpxord))]
27148pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27149 let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
27150 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
27151}
27152
27153/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27154///
27155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
27156#[inline]
27157#[target_feature(enable = "avx512f,avx512vl")]
27158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27159#[cfg_attr(test, assert_instr(vpxord))]
27160pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27161 let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
27162 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
27163 transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27164}
27165
27166/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
27169#[inline]
27170#[target_feature(enable = "avx512f")]
27171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172#[cfg_attr(test, assert_instr(vpxorq))]
27173pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
27174 transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8()))
27175}
27176
27177/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178///
27179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
27180#[inline]
27181#[target_feature(enable = "avx512f")]
27182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183#[cfg_attr(test, assert_instr(vpxorq))]
27184pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27185 let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
27186 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
27187}
27188
27189/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27190///
27191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
27192#[inline]
27193#[target_feature(enable = "avx512f")]
27194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27195#[cfg_attr(test, assert_instr(vpxorq))]
27196pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27197 let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
27198 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
27199 transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27200}
27201
27202/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27203///
27204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
27205#[inline]
27206#[target_feature(enable = "avx512f,avx512vl")]
27207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27208#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
27209pub unsafe fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
27210 transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4()))
27211}
27212
27213/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27214///
27215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
27216#[inline]
27217#[target_feature(enable = "avx512f,avx512vl")]
27218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27219#[cfg_attr(test, assert_instr(vpxorq))]
27220pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27221 let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
27222 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
27223}
27224
27225/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27226///
27227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
27228#[inline]
27229#[target_feature(enable = "avx512f,avx512vl")]
27230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27231#[cfg_attr(test, assert_instr(vpxorq))]
27232pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27233 let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
27234 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
27235 transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27236}
27237
27238/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27239///
27240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
27241#[inline]
27242#[target_feature(enable = "avx512f,avx512vl")]
27243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27244#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
27245pub unsafe fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
27246 transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2()))
27247}
27248
27249/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27250///
27251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
27252#[inline]
27253#[target_feature(enable = "avx512f,avx512vl")]
27254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27255#[cfg_attr(test, assert_instr(vpxorq))]
27256pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27257 let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
27258 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
27259}
27260
27261/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27262///
27263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
27264#[inline]
27265#[target_feature(enable = "avx512f,avx512vl")]
27266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27267#[cfg_attr(test, assert_instr(vpxorq))]
27268pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27269 let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
27270 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
27271 transmute(src:simd_select_bitmask(m:k, yes:xor, no:zero))
27272}
27273
27274/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
27275///
27276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
27277#[inline]
27278#[target_feature(enable = "avx512f")]
27279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27280#[cfg_attr(test, assert_instr(vpxorq))]
27281pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
27282 transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16()))
27283}
27284
27285/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
27286///
27287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
27288#[inline]
27289#[target_feature(enable = "avx512f")]
27290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27291#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
27292pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
27293 _mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
27294}
27295
27296/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27297///
27298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
27299#[inline]
27300#[target_feature(enable = "avx512f")]
27301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27302#[cfg_attr(test, assert_instr(vpandnd))]
27303pub unsafe fn _mm512_mask_andnot_epi32(
27304 src: __m512i,
27305 k: __mmask16,
27306 a: __m512i,
27307 b: __m512i,
27308) -> __m512i {
27309 let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
27310 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
27311}
27312
27313/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27314///
27315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
27316#[inline]
27317#[target_feature(enable = "avx512f")]
27318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27319#[cfg_attr(test, assert_instr(vpandnd))]
27320pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27321 let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
27322 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
27323 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27324}
27325
27326/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27327///
27328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
27329#[inline]
27330#[target_feature(enable = "avx512f,avx512vl")]
27331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27332#[cfg_attr(test, assert_instr(vpandnd))]
27333pub unsafe fn _mm256_mask_andnot_epi32(
27334 src: __m256i,
27335 k: __mmask8,
27336 a: __m256i,
27337 b: __m256i,
27338) -> __m256i {
27339 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
27340 let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
27341 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
27342}
27343
27344/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27345///
27346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
27347#[inline]
27348#[target_feature(enable = "avx512f,avx512vl")]
27349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27350#[cfg_attr(test, assert_instr(vpandnd))]
27351pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27352 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
27353 let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
27354 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
27355 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27356}
27357
27358/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27359///
27360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
27361#[inline]
27362#[target_feature(enable = "avx512f,avx512vl")]
27363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27364#[cfg_attr(test, assert_instr(vpandnd))]
27365pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27366 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
27367 let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
27368 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
27369}
27370
27371/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27372///
27373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
27374#[inline]
27375#[target_feature(enable = "avx512f,avx512vl")]
27376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27377#[cfg_attr(test, assert_instr(vpandnd))]
27378pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27379 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
27380 let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
27381 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
27382 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27383}
27384
27385/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
27386///
27387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
27388#[inline]
27389#[target_feature(enable = "avx512f")]
27390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27391#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
27392pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
27393 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
27394}
27395
27396/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27397///
27398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
27399#[inline]
27400#[target_feature(enable = "avx512f")]
27401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27402#[cfg_attr(test, assert_instr(vpandnq))]
27403pub unsafe fn _mm512_mask_andnot_epi64(
27404 src: __m512i,
27405 k: __mmask8,
27406 a: __m512i,
27407 b: __m512i,
27408) -> __m512i {
27409 let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
27410 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
27411}
27412
27413/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27414///
27415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
27416#[inline]
27417#[target_feature(enable = "avx512f")]
27418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27419#[cfg_attr(test, assert_instr(vpandnq))]
27420pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27421 let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
27422 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
27423 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27424}
27425
27426/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27427///
27428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
27429#[inline]
27430#[target_feature(enable = "avx512f,avx512vl")]
27431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27432#[cfg_attr(test, assert_instr(vpandnq))]
27433pub unsafe fn _mm256_mask_andnot_epi64(
27434 src: __m256i,
27435 k: __mmask8,
27436 a: __m256i,
27437 b: __m256i,
27438) -> __m256i {
27439 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
27440 let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
27441 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
27442}
27443
27444/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27445///
27446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
27447#[inline]
27448#[target_feature(enable = "avx512f,avx512vl")]
27449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27450#[cfg_attr(test, assert_instr(vpandnq))]
27451pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27452 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
27453 let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
27454 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
27455 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27456}
27457
27458/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27459///
27460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
27461#[inline]
27462#[target_feature(enable = "avx512f,avx512vl")]
27463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27464#[cfg_attr(test, assert_instr(vpandnq))]
27465pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27466 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
27467 let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
27468 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
27469}
27470
27471/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27472///
27473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
27474#[inline]
27475#[target_feature(enable = "avx512f,avx512vl")]
27476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27477#[cfg_attr(test, assert_instr(vpandnq))]
27478pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27479 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
27480 let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
27481 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
27482 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:zero))
27483}
27484
27485/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
27486///
27487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
27488#[inline]
27489#[target_feature(enable = "avx512f")]
27490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27491#[cfg_attr(test, assert_instr(vpandnq))]
27492pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
27493 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
27494}
27495
27496/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
27497///
27498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
27499#[inline]
27500#[target_feature(enable = "avx512f")]
27501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27502#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
27503pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27504 a & b
27505}
27506
27507/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
27508///
27509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kand&expand=3210)
27510#[inline]
27511#[target_feature(enable = "avx512f")]
27512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27513#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
27514pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
27515 a & b
27516}
27517
27518/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
27519///
27520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
27521#[inline]
27522#[target_feature(enable = "avx512f")]
27523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27524#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
27525pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27526 a | b
27527}
27528
27529/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
27530///
27531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kor&expand=3237)
27532#[inline]
27533#[target_feature(enable = "avx512f")]
27534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27535#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
27536pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
27537 a | b
27538}
27539
27540/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
27541///
27542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
27543#[inline]
27544#[target_feature(enable = "avx512f")]
27545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27546#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
27547pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27548 a ^ b
27549}
27550
27551/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
27552///
27553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kxor&expand=3289)
27554#[inline]
27555#[target_feature(enable = "avx512f")]
27556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27557#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
27558pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
27559 a ^ b
27560}
27561
27562/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
27563///
27564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
27565#[inline]
27566#[target_feature(enable = "avx512f")]
27567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27568pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
27569 a ^ 0b11111111_11111111
27570}
27571
27572/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
27573///
27574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_knot&expand=3231)
27575#[inline]
27576#[target_feature(enable = "avx512f")]
27577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27578pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
27579 a ^ 0b11111111_11111111
27580}
27581
27582/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
27583///
27584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
27585#[inline]
27586#[target_feature(enable = "avx512f")]
27587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27588#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
27589pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27590 _mm512_kand(a:_mm512_knot(a), b)
27591}
27592
27593/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
27594///
27595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kandn&expand=3216)
27596#[inline]
27597#[target_feature(enable = "avx512f")]
27598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27599#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
27600pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
27601 _mm512_kand(a:_mm512_knot(a), b)
27602}
27603
27604/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
27605///
27606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
27607#[inline]
27608#[target_feature(enable = "avx512f")]
27609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27610#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
27611pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27612 _mm512_knot(_mm512_kxor(a, b))
27613}
27614
27615/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
27616///
27617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kxnor&expand=3283)
27618#[inline]
27619#[target_feature(enable = "avx512f")]
27620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27621#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
27622pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
27623 _mm512_knot(_mm512_kxor(a, b))
27624}
27625
27626/// Copy 16-bit mask a to k.
27627///
27628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
27629#[inline]
27630#[target_feature(enable = "avx512f")]
27631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27632#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
27633pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
27634 a
27635}
27636
27637/// Converts integer mask into bitmask, storing the result in dst.
27638///
27639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_int2mask&expand=3189)
27640#[inline]
27641#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
27642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27643pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
27644 mask as u16
27645}
27646
27647/// Converts bit mask k1 into an integer value, storing the results in dst.
27648///
27649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_mask2int&expand=3544)
27650#[inline]
27651#[target_feature(enable = "avx512f")]
27652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27653#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
27654pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
27655 k1 as i32
27656}
27657
27658/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
27659///
27660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kunpackb&expand=3280)
27661#[inline]
27662#[target_feature(enable = "avx512f")]
27663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27664#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
27665pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
27666 let a: u16 = a & 0b00000000_11111111;
27667 let b: u16 = b & 0b11111111_00000000;
27668 a | b
27669}
27670
27671/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kortestc&expand=3247)
27674#[inline]
27675#[target_feature(enable = "avx512f")]
27676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27677#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
27678pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
27679 let r: u16 = a | b;
27680 if r == 0b11111111_11111111 {
27681 1
27682 } else {
27683 0
27684 }
27685}
27686
27687/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27688///
27689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
27690#[inline]
27691#[target_feature(enable = "avx512f")]
27692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27693#[cfg_attr(test, assert_instr(vptestmd))]
27694pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
27695 let and: __m512i = _mm512_and_epi32(a, b);
27696 let zero: __m512i = _mm512_setzero_si512();
27697 _mm512_cmpneq_epi32_mask(a:and, b:zero)
27698}
27699
27700/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27701///
27702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
27703#[inline]
27704#[target_feature(enable = "avx512f")]
27705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27706#[cfg_attr(test, assert_instr(vptestmd))]
27707pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
27708 let and: __m512i = _mm512_and_epi32(a, b);
27709 let zero: __m512i = _mm512_setzero_si512();
27710 _mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
27711}
27712
27713/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27714///
27715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
27716#[inline]
27717#[target_feature(enable = "avx512f,avx512vl")]
27718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27719#[cfg_attr(test, assert_instr(vptestmd))]
27720pub unsafe fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
27721 let and: __m256i = _mm256_and_si256(a, b);
27722 let zero: __m256i = _mm256_setzero_si256();
27723 _mm256_cmpneq_epi32_mask(a:and, b:zero)
27724}
27725
27726/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27727///
27728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
27729#[inline]
27730#[target_feature(enable = "avx512f,avx512vl")]
27731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27732#[cfg_attr(test, assert_instr(vptestmd))]
27733pub unsafe fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27734 let and: __m256i = _mm256_and_si256(a, b);
27735 let zero: __m256i = _mm256_setzero_si256();
27736 _mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
27737}
27738
27739/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27740///
27741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
27742#[inline]
27743#[target_feature(enable = "avx512f,avx512vl")]
27744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27745#[cfg_attr(test, assert_instr(vptestmd))]
27746pub unsafe fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
27747 let and: __m128i = _mm_and_si128(a, b);
27748 let zero: __m128i = _mm_setzero_si128();
27749 _mm_cmpneq_epi32_mask(a:and, b:zero)
27750}
27751
27752/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27753///
27754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
27755#[inline]
27756#[target_feature(enable = "avx512f,avx512vl")]
27757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27758#[cfg_attr(test, assert_instr(vptestmd))]
27759pub unsafe fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27760 let and: __m128i = _mm_and_si128(a, b);
27761 let zero: __m128i = _mm_setzero_si128();
27762 _mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
27763}
27764
27765/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27766///
27767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
27768#[inline]
27769#[target_feature(enable = "avx512f")]
27770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27771#[cfg_attr(test, assert_instr(vptestmq))]
27772pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
27773 let and: __m512i = _mm512_and_epi64(a, b);
27774 let zero: __m512i = _mm512_setzero_si512();
27775 _mm512_cmpneq_epi64_mask(a:and, b:zero)
27776}
27777
27778/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27779///
27780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
27781#[inline]
27782#[target_feature(enable = "avx512f")]
27783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27784#[cfg_attr(test, assert_instr(vptestmq))]
27785pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
27786 let and: __m512i = _mm512_and_epi64(a, b);
27787 let zero: __m512i = _mm512_setzero_si512();
27788 _mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
27789}
27790
27791/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27792///
27793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
27794#[inline]
27795#[target_feature(enable = "avx512f,avx512vl")]
27796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27797#[cfg_attr(test, assert_instr(vptestmq))]
27798pub unsafe fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
27799 let and: __m256i = _mm256_and_si256(a, b);
27800 let zero: __m256i = _mm256_setzero_si256();
27801 _mm256_cmpneq_epi64_mask(a:and, b:zero)
27802}
27803
27804/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27805///
27806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
27807#[inline]
27808#[target_feature(enable = "avx512f,avx512vl")]
27809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27810#[cfg_attr(test, assert_instr(vptestmq))]
27811pub unsafe fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27812 let and: __m256i = _mm256_and_si256(a, b);
27813 let zero: __m256i = _mm256_setzero_si256();
27814 _mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
27815}
27816
27817/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27818///
27819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
27820#[inline]
27821#[target_feature(enable = "avx512f,avx512vl")]
27822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27823#[cfg_attr(test, assert_instr(vptestmq))]
27824pub unsafe fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
27825 let and: __m128i = _mm_and_si128(a, b);
27826 let zero: __m128i = _mm_setzero_si128();
27827 _mm_cmpneq_epi64_mask(a:and, b:zero)
27828}
27829
27830/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27831///
27832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
27833#[inline]
27834#[target_feature(enable = "avx512f,avx512vl")]
27835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27836#[cfg_attr(test, assert_instr(vptestmq))]
27837pub unsafe fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27838 let and: __m128i = _mm_and_si128(a, b);
27839 let zero: __m128i = _mm_setzero_si128();
27840 _mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
27841}
27842
27843/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27844///
27845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
27846#[inline]
27847#[target_feature(enable = "avx512f")]
27848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27849#[cfg_attr(test, assert_instr(vptestnmd))]
27850pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
27851 let and: __m512i = _mm512_and_epi32(a, b);
27852 let zero: __m512i = _mm512_setzero_si512();
27853 _mm512_cmpeq_epi32_mask(a:and, b:zero)
27854}
27855
27856/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27857///
27858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
27859#[inline]
27860#[target_feature(enable = "avx512f")]
27861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27862#[cfg_attr(test, assert_instr(vptestnmd))]
27863pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
27864 let and: __m512i = _mm512_and_epi32(a, b);
27865 let zero: __m512i = _mm512_setzero_si512();
27866 _mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
27867}
27868
27869/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27870///
27871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
27872#[inline]
27873#[target_feature(enable = "avx512f,avx512vl")]
27874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27875#[cfg_attr(test, assert_instr(vptestnmd))]
27876pub unsafe fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
27877 let and: __m256i = _mm256_and_si256(a, b);
27878 let zero: __m256i = _mm256_setzero_si256();
27879 _mm256_cmpeq_epi32_mask(a:and, b:zero)
27880}
27881
27882/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27883///
27884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
27885#[inline]
27886#[target_feature(enable = "avx512f,avx512vl")]
27887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27888#[cfg_attr(test, assert_instr(vptestnmd))]
27889pub unsafe fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27890 let and: __m256i = _mm256_and_si256(a, b);
27891 let zero: __m256i = _mm256_setzero_si256();
27892 _mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
27893}
27894
27895/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27896///
27897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
27898#[inline]
27899#[target_feature(enable = "avx512f,avx512vl")]
27900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27901#[cfg_attr(test, assert_instr(vptestnmd))]
27902pub unsafe fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
27903 let and: __m128i = _mm_and_si128(a, b);
27904 let zero: __m128i = _mm_setzero_si128();
27905 _mm_cmpeq_epi32_mask(a:and, b:zero)
27906}
27907
27908/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27909///
27910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
27911#[inline]
27912#[target_feature(enable = "avx512f,avx512vl")]
27913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27914#[cfg_attr(test, assert_instr(vptestnmd))]
27915pub unsafe fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27916 let and: __m128i = _mm_and_si128(a, b);
27917 let zero: __m128i = _mm_setzero_si128();
27918 _mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
27919}
27920
27921/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27922///
27923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
27924#[inline]
27925#[target_feature(enable = "avx512f")]
27926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27927#[cfg_attr(test, assert_instr(vptestnmq))]
27928pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
27929 let and: __m512i = _mm512_and_epi64(a, b);
27930 let zero: __m512i = _mm512_setzero_si512();
27931 _mm512_cmpeq_epi64_mask(a:and, b:zero)
27932}
27933
27934/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27935///
27936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
27937#[inline]
27938#[target_feature(enable = "avx512f")]
27939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27940#[cfg_attr(test, assert_instr(vptestnmq))]
27941pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
27942 let and: __m512i = _mm512_and_epi64(a, b);
27943 let zero: __m512i = _mm512_setzero_si512();
27944 _mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
27945}
27946
27947/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27948///
27949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
27950#[inline]
27951#[target_feature(enable = "avx512f,avx512vl")]
27952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27953#[cfg_attr(test, assert_instr(vptestnmq))]
27954pub unsafe fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
27955 let and: __m256i = _mm256_and_si256(a, b);
27956 let zero: __m256i = _mm256_setzero_si256();
27957 _mm256_cmpeq_epi64_mask(a:and, b:zero)
27958}
27959
27960/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27961///
27962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
27963#[inline]
27964#[target_feature(enable = "avx512f,avx512vl")]
27965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27966#[cfg_attr(test, assert_instr(vptestnmq))]
27967pub unsafe fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27968 let and: __m256i = _mm256_and_si256(a, b);
27969 let zero: __m256i = _mm256_setzero_si256();
27970 _mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
27971}
27972
27973/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27974///
27975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
27976#[inline]
27977#[target_feature(enable = "avx512f,avx512vl")]
27978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27979#[cfg_attr(test, assert_instr(vptestnmq))]
27980pub unsafe fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
27981 let and: __m128i = _mm_and_si128(a, b);
27982 let zero: __m128i = _mm_setzero_si128();
27983 _mm_cmpeq_epi64_mask(a:and, b:zero)
27984}
27985
27986/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27987///
27988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
27989#[inline]
27990#[target_feature(enable = "avx512f,avx512vl")]
27991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27992#[cfg_attr(test, assert_instr(vptestnmq))]
27993pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27994 let and: __m128i = _mm_and_si128(a, b);
27995 let zero: __m128i = _mm_setzero_si128();
27996 _mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
27997}
27998
27999/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28000///
28001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
28002///
28003/// # Safety of non-temporal stores
28004///
28005/// After using this intrinsic, but before any other access to the memory that this intrinsic
28006/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28007/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28008/// return.
28009///
28010/// See [`_mm_sfence`] for details.
28011#[inline]
28012#[target_feature(enable = "avx512f")]
28013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28014#[cfg_attr(test, assert_instr(vmovntps))]
28015#[allow(clippy::cast_ptr_alignment)]
28016pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
28017 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m512, val:a);
28018}
28019
28020/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28021///
28022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
28023///
28024/// # Safety of non-temporal stores
28025///
28026/// After using this intrinsic, but before any other access to the memory that this intrinsic
28027/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28028/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28029/// return.
28030///
28031/// See [`_mm_sfence`] for details.
28032#[inline]
28033#[target_feature(enable = "avx512f")]
28034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28035#[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntpd
28036#[allow(clippy::cast_ptr_alignment)]
28037pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
28038 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m512d, val:a);
28039}
28040
28041/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28042///
28043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
28044///
28045/// # Safety of non-temporal stores
28046///
28047/// After using this intrinsic, but before any other access to the memory that this intrinsic
28048/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28049/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28050/// return.
28051///
28052/// See [`_mm_sfence`] for details.
28053#[inline]
28054#[target_feature(enable = "avx512f")]
28055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28056#[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntdq
28057#[allow(clippy::cast_ptr_alignment)]
28058pub unsafe fn _mm512_stream_si512(mem_addr: *mut i64, a: __m512i) {
28059 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m512i, val:a);
28060}
28061
28062/// Sets packed 32-bit integers in `dst` with the supplied values.
28063///
28064/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
28065#[inline]
28066#[target_feature(enable = "avx512f")]
28067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28068pub unsafe fn _mm512_set_ps(
28069 e0: f32,
28070 e1: f32,
28071 e2: f32,
28072 e3: f32,
28073 e4: f32,
28074 e5: f32,
28075 e6: f32,
28076 e7: f32,
28077 e8: f32,
28078 e9: f32,
28079 e10: f32,
28080 e11: f32,
28081 e12: f32,
28082 e13: f32,
28083 e14: f32,
28084 e15: f32,
28085) -> __m512 {
28086 _mm512_setr_ps(
28087 e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
28088 )
28089}
28090
28091/// Sets packed 32-bit integers in `dst` with the supplied values in
28092/// reverse order.
28093///
28094/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
28095#[inline]
28096#[target_feature(enable = "avx512f")]
28097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28098pub unsafe fn _mm512_setr_ps(
28099 e0: f32,
28100 e1: f32,
28101 e2: f32,
28102 e3: f32,
28103 e4: f32,
28104 e5: f32,
28105 e6: f32,
28106 e7: f32,
28107 e8: f32,
28108 e9: f32,
28109 e10: f32,
28110 e11: f32,
28111 e12: f32,
28112 e13: f32,
28113 e14: f32,
28114 e15: f32,
28115) -> __m512 {
28116 let r: f32x16 = f32x16::new(
28117 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
28118 );
28119 transmute(src:r)
28120}
28121
28122/// Broadcast 64-bit float `a` to all elements of `dst`.
28123///
28124/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
28125#[inline]
28126#[target_feature(enable = "avx512f")]
28127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28128pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
28129 transmute(src:f64x8::splat(a))
28130}
28131
28132/// Broadcast 32-bit float `a` to all elements of `dst`.
28133///
28134/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
28135#[inline]
28136#[target_feature(enable = "avx512f")]
28137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28138pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
28139 transmute(src:f32x16::splat(a))
28140}
28141
28142/// Sets packed 32-bit integers in `dst` with the supplied values.
28143///
28144/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
28145#[inline]
28146#[target_feature(enable = "avx512f")]
28147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28148pub unsafe fn _mm512_set_epi32(
28149 e15: i32,
28150 e14: i32,
28151 e13: i32,
28152 e12: i32,
28153 e11: i32,
28154 e10: i32,
28155 e9: i32,
28156 e8: i32,
28157 e7: i32,
28158 e6: i32,
28159 e5: i32,
28160 e4: i32,
28161 e3: i32,
28162 e2: i32,
28163 e1: i32,
28164 e0: i32,
28165) -> __m512i {
28166 _mm512_setr_epi32(
28167 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
28168 )
28169}
28170
28171/// Broadcast 8-bit integer a to all elements of dst.
28172///
28173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
28174#[inline]
28175#[target_feature(enable = "avx512f")]
28176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28177pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
28178 transmute(src:i8x64::splat(a))
28179}
28180
28181/// Broadcast the low packed 16-bit integer from a to all elements of dst.
28182///
28183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
28184#[inline]
28185#[target_feature(enable = "avx512f")]
28186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28187pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i {
28188 transmute(src:i16x32::splat(a))
28189}
28190
28191/// Broadcast 32-bit integer `a` to all elements of `dst`.
28192#[inline]
28193#[target_feature(enable = "avx512f")]
28194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28195pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
28196 transmute(src:i32x16::splat(a))
28197}
28198
28199/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28200///
28201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
28202#[inline]
28203#[target_feature(enable = "avx512f")]
28204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28205#[cfg_attr(test, assert_instr(vpbroadcastd))]
28206pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
28207 let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
28208 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
28209}
28210
28211/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28212///
28213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
28214#[inline]
28215#[target_feature(enable = "avx512f")]
28216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28217#[cfg_attr(test, assert_instr(vpbroadcastd))]
28218pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
28219 let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
28220 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
28221 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28222}
28223
28224/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28225///
28226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
28227#[inline]
28228#[target_feature(enable = "avx512f,avx512vl")]
28229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28230#[cfg_attr(test, assert_instr(vpbroadcastd))]
28231pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
28232 let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
28233 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
28234}
28235
28236/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28237///
28238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
28239#[inline]
28240#[target_feature(enable = "avx512f,avx512vl")]
28241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28242#[cfg_attr(test, assert_instr(vpbroadcastd))]
28243pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
28244 let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
28245 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
28246 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28247}
28248
28249/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28250///
28251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
28252#[inline]
28253#[target_feature(enable = "avx512f,avx512vl")]
28254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28255#[cfg_attr(test, assert_instr(vpbroadcastd))]
28256pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
28257 let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
28258 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
28259}
28260
28261/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28262///
28263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
28264#[inline]
28265#[target_feature(enable = "avx512f,avx512vl")]
28266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28267#[cfg_attr(test, assert_instr(vpbroadcastd))]
28268pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
28269 let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
28270 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
28271 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28272}
28273
28274/// Broadcast 64-bit integer `a` to all elements of `dst`.
28275///
28276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
28277#[inline]
28278#[target_feature(enable = "avx512f")]
28279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28280pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
28281 transmute(src:i64x8::splat(a))
28282}
28283
28284/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28285///
28286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
28287#[inline]
28288#[target_feature(enable = "avx512f")]
28289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28290#[cfg_attr(test, assert_instr(vpbroadcastq))]
28291pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
28292 let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
28293 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
28294}
28295
28296/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28297///
28298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
28299#[inline]
28300#[target_feature(enable = "avx512f")]
28301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28302#[cfg_attr(test, assert_instr(vpbroadcastq))]
28303pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
28304 let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
28305 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
28306 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28307}
28308
28309/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310///
28311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
28312#[inline]
28313#[target_feature(enable = "avx512f,avx512vl")]
28314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28315#[cfg_attr(test, assert_instr(vpbroadcastq))]
28316pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
28317 let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
28318 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
28319}
28320
28321/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28322///
28323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
28324#[inline]
28325#[target_feature(enable = "avx512f,avx512vl")]
28326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28327#[cfg_attr(test, assert_instr(vpbroadcastq))]
28328pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
28329 let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
28330 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
28331 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28332}
28333
28334/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28335///
28336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
28337#[inline]
28338#[target_feature(enable = "avx512f,avx512vl")]
28339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28340#[cfg_attr(test, assert_instr(vpbroadcastq))]
28341pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
28342 let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
28343 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
28344}
28345
28346/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28347///
28348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
28349#[inline]
28350#[target_feature(enable = "avx512f,avx512vl")]
28351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28352#[cfg_attr(test, assert_instr(vpbroadcastq))]
28353pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
28354 let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
28355 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
28356 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
28357}
28358
28359/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
28360///
28361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
28362#[inline]
28363#[target_feature(enable = "avx512f")]
28364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28365pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
28366 _mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
28367}
28368
28369/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
28370///
28371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
28372#[inline]
28373#[target_feature(enable = "avx512f")]
28374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28375pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
28376 _mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
28377}
28378
28379/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
28380///
28381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
28382#[inline]
28383#[target_feature(enable = "avx512f")]
28384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28385#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28386pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28387 _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
28388}
28389
28390/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28391///
28392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
28393#[inline]
28394#[target_feature(enable = "avx512f")]
28395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28396#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28397pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28398 _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
28399}
28400
28401/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
28402///
28403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
28404#[inline]
28405#[target_feature(enable = "avx512f")]
28406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28407#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28408pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28409 _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
28410}
28411
28412/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28413///
28414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
28415#[inline]
28416#[target_feature(enable = "avx512f")]
28417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28418#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28419pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28420 _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
28421}
28422
28423/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
28424///
28425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
28426#[inline]
28427#[target_feature(enable = "avx512f")]
28428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28429#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28430pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28431 _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
28432}
28433
28434/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28435///
28436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
28437#[inline]
28438#[target_feature(enable = "avx512f")]
28439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28440#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28441pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28442 _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
28443}
28444
28445/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
28446///
28447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
28448#[inline]
28449#[target_feature(enable = "avx512f")]
28450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28451#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28452pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28453 _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
28454}
28455
28456/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28457///
28458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
28459#[inline]
28460#[target_feature(enable = "avx512f")]
28461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28462#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28463pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28464 _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
28465}
28466
28467/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
28468///
28469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
28470#[inline]
28471#[target_feature(enable = "avx512f")]
28472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28473#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28474pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28475 _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
28476}
28477
28478/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28479///
28480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
28481#[inline]
28482#[target_feature(enable = "avx512f")]
28483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28484#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28485pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28486 _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
28487}
28488
28489/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
28490///
28491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
28492#[inline]
28493#[target_feature(enable = "avx512f")]
28494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28495#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28496pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28497 _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
28498}
28499
28500/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28501///
28502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
28503#[inline]
28504#[target_feature(enable = "avx512f")]
28505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28506#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28507pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28508 _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
28509}
28510
28511/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28512///
28513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
28514#[inline]
28515#[target_feature(enable = "avx512f")]
28516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28517#[rustc_legacy_const_generics(2)]
28518#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28519pub unsafe fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
28520 static_assert_uimm_bits!(IMM8, 5);
28521 let neg_one: i16 = -1;
28522 let a: f32x16 = a.as_f32x16();
28523 let b: f32x16 = b.as_f32x16();
28524 let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
28525 transmute(src:r)
28526}
28527
28528/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28529///
28530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
28531#[inline]
28532#[target_feature(enable = "avx512f")]
28533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28534#[rustc_legacy_const_generics(3)]
28535#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28536pub unsafe fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(
28537 k1: __mmask16,
28538 a: __m512,
28539 b: __m512,
28540) -> __mmask16 {
28541 static_assert_uimm_bits!(IMM8, 5);
28542 let a: f32x16 = a.as_f32x16();
28543 let b: f32x16 = b.as_f32x16();
28544 let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
28545 transmute(src:r)
28546}
28547
28548/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28549///
28550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
28551#[inline]
28552#[target_feature(enable = "avx512f,avx512vl")]
28553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28554#[rustc_legacy_const_generics(2)]
28555#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28556pub unsafe fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
28557 static_assert_uimm_bits!(IMM8, 5);
28558 let neg_one: i8 = -1;
28559 let a: f32x8 = a.as_f32x8();
28560 let b: f32x8 = b.as_f32x8();
28561 let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
28562 transmute(src:r)
28563}
28564
28565/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28566///
28567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
28568#[inline]
28569#[target_feature(enable = "avx512f,avx512vl")]
28570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28571#[rustc_legacy_const_generics(3)]
28572#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28573pub unsafe fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(
28574 k1: __mmask8,
28575 a: __m256,
28576 b: __m256,
28577) -> __mmask8 {
28578 static_assert_uimm_bits!(IMM8, 5);
28579 let a: f32x8 = a.as_f32x8();
28580 let b: f32x8 = b.as_f32x8();
28581 let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
28582 transmute(src:r)
28583}
28584
28585/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28586///
28587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
28588#[inline]
28589#[target_feature(enable = "avx512f,avx512vl")]
28590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28591#[rustc_legacy_const_generics(2)]
28592#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28593pub unsafe fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
28594 static_assert_uimm_bits!(IMM8, 5);
28595 let neg_one: i8 = -1;
28596 let a: f32x4 = a.as_f32x4();
28597 let b: f32x4 = b.as_f32x4();
28598 let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
28599 transmute(src:r)
28600}
28601
28602/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28603///
28604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
28605#[inline]
28606#[target_feature(enable = "avx512f,avx512vl")]
28607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28608#[rustc_legacy_const_generics(3)]
28609#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28610pub unsafe fn _mm_mask_cmp_ps_mask<const IMM8: i32>(
28611 k1: __mmask8,
28612 a: __m128,
28613 b: __m128,
28614) -> __mmask8 {
28615 static_assert_uimm_bits!(IMM8, 5);
28616 let a: f32x4 = a.as_f32x4();
28617 let b: f32x4 = b.as_f32x4();
28618 let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
28619 transmute(src:r)
28620}
28621
28622/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
28623/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28624///
28625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
28626#[inline]
28627#[target_feature(enable = "avx512f")]
28628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28629#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
28630#[rustc_legacy_const_generics(2, 3)]
28631pub unsafe fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
28632 a: __m512,
28633 b: __m512,
28634) -> __mmask16 {
28635 static_assert_uimm_bits!(IMM5, 5);
28636 static_assert_mantissas_sae!(SAE);
28637 let neg_one: i16 = -1;
28638 let a: f32x16 = a.as_f32x16();
28639 let b: f32x16 = b.as_f32x16();
28640 let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
28641 transmute(src:r)
28642}
28643
28644/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
28645/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28646///
28647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
28648#[inline]
28649#[target_feature(enable = "avx512f")]
28650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28651#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
28652#[rustc_legacy_const_generics(3, 4)]
28653pub unsafe fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
28654 m: __mmask16,
28655 a: __m512,
28656 b: __m512,
28657) -> __mmask16 {
28658 static_assert_uimm_bits!(IMM5, 5);
28659 static_assert_mantissas_sae!(SAE);
28660 let a: f32x16 = a.as_f32x16();
28661 let b: f32x16 = b.as_f32x16();
28662 let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
28663 transmute(src:r)
28664}
28665
28666/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
28667///
28668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
28669#[inline]
28670#[target_feature(enable = "avx512f")]
28671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28672#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
28673pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28674 _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
28675}
28676
28677/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28678///
28679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
28680#[inline]
28681#[target_feature(enable = "avx512f")]
28682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28683#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28684pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28685 _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
28686}
28687
28688/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
28689///
28690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
28691#[inline]
28692#[target_feature(enable = "avx512f")]
28693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28694#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28695pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28696 _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
28697}
28698
28699/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28700///
28701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
28702#[inline]
28703#[target_feature(enable = "avx512f")]
28704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28705#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28706pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28707 _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
28708}
28709
28710/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
28711///
28712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
28713#[inline]
28714#[target_feature(enable = "avx512f")]
28715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28716#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28717pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28718 _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
28719}
28720
28721/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28722///
28723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
28724#[inline]
28725#[target_feature(enable = "avx512f")]
28726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28727#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28728pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28729 _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
28730}
28731
28732/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
28733///
28734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
28735#[inline]
28736#[target_feature(enable = "avx512f")]
28737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28738#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28739pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28740 _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
28741}
28742
28743/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28744///
28745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
28746#[inline]
28747#[target_feature(enable = "avx512f")]
28748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28749#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28750pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28751 _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
28752}
28753
28754/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
28755///
28756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
28757#[inline]
28758#[target_feature(enable = "avx512f")]
28759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28760#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28761pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28762 _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
28763}
28764
28765/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28766///
28767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
28768#[inline]
28769#[target_feature(enable = "avx512f")]
28770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28771#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28772pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28773 _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
28774}
28775
28776/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
28777///
28778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
28779#[inline]
28780#[target_feature(enable = "avx512f")]
28781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28782#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28783pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28784 _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
28785}
28786
28787/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28788///
28789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
28790#[inline]
28791#[target_feature(enable = "avx512f")]
28792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28793#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28794pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28795 _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
28796}
28797
28798/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
28799///
28800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
28801#[inline]
28802#[target_feature(enable = "avx512f")]
28803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28804#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28805pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28806 _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
28807}
28808
28809/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28810///
28811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
28812#[inline]
28813#[target_feature(enable = "avx512f")]
28814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28815#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28816pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28817 _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
28818}
28819
28820/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
28821///
28822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
28823#[inline]
28824#[target_feature(enable = "avx512f")]
28825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28826#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28827pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28828 _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
28829}
28830
28831/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28832///
28833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
28834#[inline]
28835#[target_feature(enable = "avx512f")]
28836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28837#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28838pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28839 _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
28840}
28841
28842/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28843///
28844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
28845#[inline]
28846#[target_feature(enable = "avx512f")]
28847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28848#[rustc_legacy_const_generics(2)]
28849#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28850pub unsafe fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
28851 static_assert_uimm_bits!(IMM8, 5);
28852 let neg_one: i8 = -1;
28853 let a: f64x8 = a.as_f64x8();
28854 let b: f64x8 = b.as_f64x8();
28855 let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
28856 transmute(src:r)
28857}
28858
28859/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28860///
28861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
28862#[inline]
28863#[target_feature(enable = "avx512f")]
28864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28865#[rustc_legacy_const_generics(3)]
28866#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28867pub unsafe fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(
28868 k1: __mmask8,
28869 a: __m512d,
28870 b: __m512d,
28871) -> __mmask8 {
28872 static_assert_uimm_bits!(IMM8, 5);
28873 let a: f64x8 = a.as_f64x8();
28874 let b: f64x8 = b.as_f64x8();
28875 let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
28876 transmute(src:r)
28877}
28878
28879/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28880///
28881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
28882#[inline]
28883#[target_feature(enable = "avx512f,avx512vl")]
28884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28885#[rustc_legacy_const_generics(2)]
28886#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28887pub unsafe fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
28888 static_assert_uimm_bits!(IMM8, 5);
28889 let neg_one: i8 = -1;
28890 let a: f64x4 = a.as_f64x4();
28891 let b: f64x4 = b.as_f64x4();
28892 let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
28893 transmute(src:r)
28894}
28895
28896/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28897///
28898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
28899#[inline]
28900#[target_feature(enable = "avx512f,avx512vl")]
28901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28902#[rustc_legacy_const_generics(3)]
28903#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28904pub unsafe fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(
28905 k1: __mmask8,
28906 a: __m256d,
28907 b: __m256d,
28908) -> __mmask8 {
28909 static_assert_uimm_bits!(IMM8, 5);
28910 let a: f64x4 = a.as_f64x4();
28911 let b: f64x4 = b.as_f64x4();
28912 let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
28913 transmute(src:r)
28914}
28915
28916/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28917///
28918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
28919#[inline]
28920#[target_feature(enable = "avx512f,avx512vl")]
28921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28922#[rustc_legacy_const_generics(2)]
28923#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28924pub unsafe fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
28925 static_assert_uimm_bits!(IMM8, 5);
28926 let neg_one: i8 = -1;
28927 let a: f64x2 = a.as_f64x2();
28928 let b: f64x2 = b.as_f64x2();
28929 let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
28930 transmute(src:r)
28931}
28932
28933/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28934///
28935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
28936#[inline]
28937#[target_feature(enable = "avx512f,avx512vl")]
28938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28939#[rustc_legacy_const_generics(3)]
28940#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28941pub unsafe fn _mm_mask_cmp_pd_mask<const IMM8: i32>(
28942 k1: __mmask8,
28943 a: __m128d,
28944 b: __m128d,
28945) -> __mmask8 {
28946 static_assert_uimm_bits!(IMM8, 5);
28947 let a: f64x2 = a.as_f64x2();
28948 let b: f64x2 = b.as_f64x2();
28949 let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
28950 transmute(src:r)
28951}
28952
28953/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
28954/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28955///
28956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
28957#[inline]
28958#[target_feature(enable = "avx512f")]
28959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28960#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
28961#[rustc_legacy_const_generics(2, 3)]
28962pub unsafe fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
28963 a: __m512d,
28964 b: __m512d,
28965) -> __mmask8 {
28966 static_assert_uimm_bits!(IMM5, 5);
28967 static_assert_mantissas_sae!(SAE);
28968 let neg_one: i8 = -1;
28969 let a: f64x8 = a.as_f64x8();
28970 let b: f64x8 = b.as_f64x8();
28971 let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
28972 transmute(src:r)
28973}
28974
28975/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
28976/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28977///
28978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
28979#[inline]
28980#[target_feature(enable = "avx512f")]
28981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28982#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
28983#[rustc_legacy_const_generics(3, 4)]
28984pub unsafe fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
28985 k1: __mmask8,
28986 a: __m512d,
28987 b: __m512d,
28988) -> __mmask8 {
28989 static_assert_uimm_bits!(IMM5, 5);
28990 static_assert_mantissas_sae!(SAE);
28991 let a: f64x8 = a.as_f64x8();
28992 let b: f64x8 = b.as_f64x8();
28993 let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
28994 transmute(src:r)
28995}
28996
28997/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
28998///
28999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
29000#[inline]
29001#[target_feature(enable = "avx512f")]
29002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29003#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29004pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
29005 _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
29006}
29007
29008/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29009///
29010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
29011#[inline]
29012#[target_feature(enable = "avx512f")]
29013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29014#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29015pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
29016 _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
29017}
29018
29019/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
29020///
29021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
29022#[inline]
29023#[target_feature(enable = "avx512f")]
29024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29025#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29026pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
29027 _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
29028}
29029
29030/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29031///
29032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
29033#[inline]
29034#[target_feature(enable = "avx512f")]
29035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29036#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29037pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
29038 _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
29039}
29040
29041/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
29042///
29043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
29044#[inline]
29045#[target_feature(enable = "avx512f")]
29046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29047#[rustc_legacy_const_generics(2)]
29048#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29049pub unsafe fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
29050 static_assert_uimm_bits!(IMM8, 5);
29051 let neg_one: i8 = -1;
29052 let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
29053 transmute(src:r)
29054}
29055
29056/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
29057///
29058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
29059#[inline]
29060#[target_feature(enable = "avx512f")]
29061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29062#[rustc_legacy_const_generics(3)]
29063#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29064pub unsafe fn _mm_mask_cmp_ss_mask<const IMM8: i32>(
29065 k1: __mmask8,
29066 a: __m128,
29067 b: __m128,
29068) -> __mmask8 {
29069 static_assert_uimm_bits!(IMM8, 5);
29070 let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
29071 transmute(src:r)
29072}
29073
29074/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
29075/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29076///
29077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
29078#[inline]
29079#[target_feature(enable = "avx512f")]
29080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29081#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29082#[rustc_legacy_const_generics(2, 3)]
29083pub unsafe fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
29084 a: __m128,
29085 b: __m128,
29086) -> __mmask8 {
29087 static_assert_uimm_bits!(IMM5, 5);
29088 static_assert_mantissas_sae!(SAE);
29089 let neg_one: i8 = -1;
29090 let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
29091 transmute(src:r)
29092}
29093
29094/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
29095/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29096///
29097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
29098#[inline]
29099#[target_feature(enable = "avx512f")]
29100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29101#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29102#[rustc_legacy_const_generics(3, 4)]
29103pub unsafe fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
29104 k1: __mmask8,
29105 a: __m128,
29106 b: __m128,
29107) -> __mmask8 {
29108 static_assert_uimm_bits!(IMM5, 5);
29109 static_assert_mantissas_sae!(SAE);
29110 let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
29111 transmute(src:r)
29112}
29113
29114/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
29115///
29116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
29117#[inline]
29118#[target_feature(enable = "avx512f")]
29119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29120#[rustc_legacy_const_generics(2)]
29121#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29122pub unsafe fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
29123 static_assert_uimm_bits!(IMM8, 5);
29124 let neg_one: i8 = -1;
29125 let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
29126 transmute(src:r)
29127}
29128
29129/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
29130///
29131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
29132#[inline]
29133#[target_feature(enable = "avx512f")]
29134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29135#[rustc_legacy_const_generics(3)]
29136#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29137pub unsafe fn _mm_mask_cmp_sd_mask<const IMM8: i32>(
29138 k1: __mmask8,
29139 a: __m128d,
29140 b: __m128d,
29141) -> __mmask8 {
29142 static_assert_uimm_bits!(IMM8, 5);
29143 let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
29144 transmute(src:r)
29145}
29146
29147/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
29148/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29149///
29150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
29151#[inline]
29152#[target_feature(enable = "avx512f")]
29153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29154#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29155#[rustc_legacy_const_generics(2, 3)]
29156pub unsafe fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
29157 a: __m128d,
29158 b: __m128d,
29159) -> __mmask8 {
29160 static_assert_uimm_bits!(IMM5, 5);
29161 static_assert_mantissas_sae!(SAE);
29162 let neg_one: i8 = -1;
29163 let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
29164 transmute(src:r)
29165}
29166
29167/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
29168/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29169///
29170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
29171#[inline]
29172#[target_feature(enable = "avx512f")]
29173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29174#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29175#[rustc_legacy_const_generics(3, 4)]
29176pub unsafe fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
29177 k1: __mmask8,
29178 a: __m128d,
29179 b: __m128d,
29180) -> __mmask8 {
29181 static_assert_uimm_bits!(IMM5, 5);
29182 static_assert_mantissas_sae!(SAE);
29183 let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
29184 transmute(src:r)
29185}
29186
29187/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29188///
29189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
29190#[inline]
29191#[target_feature(enable = "avx512f")]
29192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29193#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29194pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29195 simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16()))
29196}
29197
29198/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29199///
29200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
29201#[inline]
29202#[target_feature(enable = "avx512f")]
29203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29204#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29205pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29206 _mm512_cmplt_epu32_mask(a, b) & k1
29207}
29208
29209/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29210///
29211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
29212#[inline]
29213#[target_feature(enable = "avx512f,avx512vl")]
29214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29215#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29216pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29217 simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8()))
29218}
29219
29220/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29221///
29222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
29223#[inline]
29224#[target_feature(enable = "avx512f,avx512vl")]
29225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29226#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29227pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29228 _mm256_cmplt_epu32_mask(a, b) & k1
29229}
29230
29231/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29232///
29233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
29234#[inline]
29235#[target_feature(enable = "avx512f,avx512vl")]
29236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29237#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29238pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29239 simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4()))
29240}
29241
29242/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29243///
29244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
29245#[inline]
29246#[target_feature(enable = "avx512f,avx512vl")]
29247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29248#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29249pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29250 _mm_cmplt_epu32_mask(a, b) & k1
29251}
29252
29253/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29254///
29255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
29256#[inline]
29257#[target_feature(enable = "avx512f")]
29258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29259#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29260pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29261 simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16()))
29262}
29263
29264/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29265///
29266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
29267#[inline]
29268#[target_feature(enable = "avx512f")]
29269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29270#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29271pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29272 _mm512_cmpgt_epu32_mask(a, b) & k1
29273}
29274
29275/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29276///
29277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
29278#[inline]
29279#[target_feature(enable = "avx512f,avx512vl")]
29280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29281#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29282pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29283 simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8()))
29284}
29285
29286/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29287///
29288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
29289#[inline]
29290#[target_feature(enable = "avx512f,avx512vl")]
29291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29292#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29293pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29294 _mm256_cmpgt_epu32_mask(a, b) & k1
29295}
29296
29297/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29298///
29299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
29300#[inline]
29301#[target_feature(enable = "avx512f,avx512vl")]
29302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29303#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29304pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29305 simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4()))
29306}
29307
29308/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29309///
29310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
29311#[inline]
29312#[target_feature(enable = "avx512f,avx512vl")]
29313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29314#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29315pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29316 _mm_cmpgt_epu32_mask(a, b) & k1
29317}
29318
29319/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29320///
29321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
29322#[inline]
29323#[target_feature(enable = "avx512f")]
29324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29325#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29326pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29327 simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16()))
29328}
29329
29330/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29331///
29332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
29333#[inline]
29334#[target_feature(enable = "avx512f")]
29335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29336#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29337pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29338 _mm512_cmple_epu32_mask(a, b) & k1
29339}
29340
29341/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29342///
29343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
29344#[inline]
29345#[target_feature(enable = "avx512f,avx512vl")]
29346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29347#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29348pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29349 simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8()))
29350}
29351
29352/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29353///
29354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
29355#[inline]
29356#[target_feature(enable = "avx512f,avx512vl")]
29357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29358#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29359pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29360 _mm256_cmple_epu32_mask(a, b) & k1
29361}
29362
29363/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29364///
29365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
29366#[inline]
29367#[target_feature(enable = "avx512f,avx512vl")]
29368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29369#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29370pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29371 simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4()))
29372}
29373
29374/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29375///
29376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
29377#[inline]
29378#[target_feature(enable = "avx512f,avx512vl")]
29379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29380#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29381pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29382 _mm_cmple_epu32_mask(a, b) & k1
29383}
29384
29385/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29386///
29387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
29388#[inline]
29389#[target_feature(enable = "avx512f")]
29390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29391#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29392pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29393 simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16()))
29394}
29395
29396/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
29399#[inline]
29400#[target_feature(enable = "avx512f")]
29401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29402#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29403pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29404 _mm512_cmpge_epu32_mask(a, b) & k1
29405}
29406
29407/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29408///
29409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
29410#[inline]
29411#[target_feature(enable = "avx512f,avx512vl")]
29412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29413#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29414pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29415 simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8()))
29416}
29417
29418/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29419///
29420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
29421#[inline]
29422#[target_feature(enable = "avx512f,avx512vl")]
29423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29424#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29425pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29426 _mm256_cmpge_epu32_mask(a, b) & k1
29427}
29428
29429/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29430///
29431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
29432#[inline]
29433#[target_feature(enable = "avx512f,avx512vl")]
29434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29435#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29436pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29437 simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4()))
29438}
29439
29440/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29441///
29442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
29443#[inline]
29444#[target_feature(enable = "avx512f,avx512vl")]
29445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29446#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29447pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29448 _mm_cmpge_epu32_mask(a, b) & k1
29449}
29450
29451/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29452///
29453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
29454#[inline]
29455#[target_feature(enable = "avx512f")]
29456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29457#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29458pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29459 simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16()))
29460}
29461
29462/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29463///
29464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
29465#[inline]
29466#[target_feature(enable = "avx512f")]
29467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29468#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29469pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29470 _mm512_cmpeq_epu32_mask(a, b) & k1
29471}
29472
29473/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29474///
29475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
29476#[inline]
29477#[target_feature(enable = "avx512f,avx512vl")]
29478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29479#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29480pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29481 simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8()))
29482}
29483
29484/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29485///
29486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
29487#[inline]
29488#[target_feature(enable = "avx512f,avx512vl")]
29489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29490#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29491pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29492 _mm256_cmpeq_epu32_mask(a, b) & k1
29493}
29494
29495/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29496///
29497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
29498#[inline]
29499#[target_feature(enable = "avx512f,avx512vl")]
29500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29501#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29502pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29503 simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4()))
29504}
29505
29506/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29507///
29508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
29509#[inline]
29510#[target_feature(enable = "avx512f,avx512vl")]
29511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29512#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29513pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29514 _mm_cmpeq_epu32_mask(a, b) & k1
29515}
29516
29517/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29518///
29519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
29520#[inline]
29521#[target_feature(enable = "avx512f")]
29522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29523#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29524pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29525 simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16()))
29526}
29527
29528/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29529///
29530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
29531#[inline]
29532#[target_feature(enable = "avx512f")]
29533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29534#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29535pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29536 _mm512_cmpneq_epu32_mask(a, b) & k1
29537}
29538
29539/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29540///
29541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
29542#[inline]
29543#[target_feature(enable = "avx512f,avx512vl")]
29544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29545#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29546pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29547 simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8()))
29548}
29549
29550/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29551///
29552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
29553#[inline]
29554#[target_feature(enable = "avx512f,avx512vl")]
29555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29556#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29557pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29558 _mm256_cmpneq_epu32_mask(a, b) & k1
29559}
29560
29561/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29562///
29563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
29564#[inline]
29565#[target_feature(enable = "avx512f,avx512vl")]
29566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29567#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29568pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29569 simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4()))
29570}
29571
29572/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29573///
29574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
29575#[inline]
29576#[target_feature(enable = "avx512f,avx512vl")]
29577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29578#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29579pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29580 _mm_cmpneq_epu32_mask(a, b) & k1
29581}
29582
29583/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29584///
29585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
29586#[inline]
29587#[target_feature(enable = "avx512f")]
29588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29589#[rustc_legacy_const_generics(2)]
29590#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29591pub unsafe fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29592 a: __m512i,
29593 b: __m512i,
29594) -> __mmask16 {
29595 static_assert_uimm_bits!(IMM3, 3);
29596 let neg_one: i16 = -1;
29597 let a: i32x16 = a.as_i32x16();
29598 let b: i32x16 = b.as_i32x16();
29599 let r: i16 = vpcmpud(a, b, IMM3, m:neg_one);
29600 transmute(src:r)
29601}
29602
29603/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29604///
29605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
29606#[inline]
29607#[target_feature(enable = "avx512f")]
29608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29609#[rustc_legacy_const_generics(3)]
29610#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29611pub unsafe fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29612 k1: __mmask16,
29613 a: __m512i,
29614 b: __m512i,
29615) -> __mmask16 {
29616 static_assert_uimm_bits!(IMM3, 3);
29617 let a: i32x16 = a.as_i32x16();
29618 let b: i32x16 = b.as_i32x16();
29619 let r: i16 = vpcmpud(a, b, IMM3, m:k1 as i16);
29620 transmute(src:r)
29621}
29622
29623/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29624///
29625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
29626#[inline]
29627#[target_feature(enable = "avx512f,avx512vl")]
29628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29629#[rustc_legacy_const_generics(2)]
29630#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29631pub unsafe fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29632 a: __m256i,
29633 b: __m256i,
29634) -> __mmask8 {
29635 static_assert_uimm_bits!(IMM3, 3);
29636 let neg_one: i8 = -1;
29637 let a: i32x8 = a.as_i32x8();
29638 let b: i32x8 = b.as_i32x8();
29639 let r: i8 = vpcmpud256(a, b, IMM3, m:neg_one);
29640 transmute(src:r)
29641}
29642
29643/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29644///
29645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
29646#[inline]
29647#[target_feature(enable = "avx512f,avx512vl")]
29648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29649#[rustc_legacy_const_generics(3)]
29650#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29651pub unsafe fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29652 k1: __mmask8,
29653 a: __m256i,
29654 b: __m256i,
29655) -> __mmask8 {
29656 static_assert_uimm_bits!(IMM3, 3);
29657 let a: i32x8 = a.as_i32x8();
29658 let b: i32x8 = b.as_i32x8();
29659 let r: i8 = vpcmpud256(a, b, IMM3, m:k1 as i8);
29660 transmute(src:r)
29661}
29662
29663/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29664///
29665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
29666#[inline]
29667#[target_feature(enable = "avx512f,avx512vl")]
29668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29669#[rustc_legacy_const_generics(2)]
29670#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29671pub unsafe fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
29672 static_assert_uimm_bits!(IMM3, 3);
29673 let neg_one: i8 = -1;
29674 let a: i32x4 = a.as_i32x4();
29675 let b: i32x4 = b.as_i32x4();
29676 let r: i8 = vpcmpud128(a, b, IMM3, m:neg_one);
29677 transmute(src:r)
29678}
29679
29680/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29681///
29682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
29683#[inline]
29684#[target_feature(enable = "avx512f,avx512vl")]
29685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29686#[rustc_legacy_const_generics(3)]
29687#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29688pub unsafe fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29689 k1: __mmask8,
29690 a: __m128i,
29691 b: __m128i,
29692) -> __mmask8 {
29693 static_assert_uimm_bits!(IMM3, 3);
29694 let a: i32x4 = a.as_i32x4();
29695 let b: i32x4 = b.as_i32x4();
29696 let r: i8 = vpcmpud128(a, b, IMM3, m:k1 as i8);
29697 transmute(src:r)
29698}
29699
29700/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29701///
29702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
29703#[inline]
29704#[target_feature(enable = "avx512f")]
29705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29706#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29707pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29708 simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16()))
29709}
29710
29711/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29712///
29713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
29714#[inline]
29715#[target_feature(enable = "avx512f")]
29716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29717#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29718pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29719 _mm512_cmplt_epi32_mask(a, b) & k1
29720}
29721
29722/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29723///
29724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
29725#[inline]
29726#[target_feature(enable = "avx512f,avx512vl")]
29727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29729pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29730 simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8()))
29731}
29732
29733/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29734///
29735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
29736#[inline]
29737#[target_feature(enable = "avx512f,avx512vl")]
29738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29739#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29740pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29741 _mm256_cmplt_epi32_mask(a, b) & k1
29742}
29743
29744/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29745///
29746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
29747#[inline]
29748#[target_feature(enable = "avx512f,avx512vl")]
29749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29750#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29751pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29752 simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4()))
29753}
29754
29755/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29756///
29757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
29758#[inline]
29759#[target_feature(enable = "avx512f,avx512vl")]
29760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29761#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29762pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29763 _mm_cmplt_epi32_mask(a, b) & k1
29764}
29765
29766/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29767///
29768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
29769#[inline]
29770#[target_feature(enable = "avx512f")]
29771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29772#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29773pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29774 simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16()))
29775}
29776
29777/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29778///
29779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
29780#[inline]
29781#[target_feature(enable = "avx512f")]
29782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29783#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29784pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29785 _mm512_cmpgt_epi32_mask(a, b) & k1
29786}
29787
29788/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29789///
29790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
29791#[inline]
29792#[target_feature(enable = "avx512f,avx512vl")]
29793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29794#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29795pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29796 simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8()))
29797}
29798
29799/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29800///
29801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
29802#[inline]
29803#[target_feature(enable = "avx512f,avx512vl")]
29804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29805#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29806pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29807 _mm256_cmpgt_epi32_mask(a, b) & k1
29808}
29809
29810/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29811///
29812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
29813#[inline]
29814#[target_feature(enable = "avx512f,avx512vl")]
29815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29816#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29817pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29818 simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4()))
29819}
29820
29821/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29822///
29823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
29824#[inline]
29825#[target_feature(enable = "avx512f,avx512vl")]
29826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29827#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29828pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29829 _mm_cmpgt_epi32_mask(a, b) & k1
29830}
29831
29832/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29833///
29834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
29835#[inline]
29836#[target_feature(enable = "avx512f")]
29837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29838#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29839pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29840 simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16()))
29841}
29842
29843/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29844///
29845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
29846#[inline]
29847#[target_feature(enable = "avx512f")]
29848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29849#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29850pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29851 _mm512_cmple_epi32_mask(a, b) & k1
29852}
29853
29854/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29855///
29856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
29857#[inline]
29858#[target_feature(enable = "avx512f,avx512vl")]
29859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29861pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29862 simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8()))
29863}
29864
29865/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29866///
29867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
29868#[inline]
29869#[target_feature(enable = "avx512f,avx512vl")]
29870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29871#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29872pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29873 _mm256_cmple_epi32_mask(a, b) & k1
29874}
29875
29876/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29877///
29878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
29879#[inline]
29880#[target_feature(enable = "avx512f,avx512vl")]
29881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29882#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29883pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29884 simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4()))
29885}
29886
29887/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29888///
29889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
29890#[inline]
29891#[target_feature(enable = "avx512f,avx512vl")]
29892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29893#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29894pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29895 _mm_cmple_epi32_mask(a, b) & k1
29896}
29897
29898/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29899///
29900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
29901#[inline]
29902#[target_feature(enable = "avx512f")]
29903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29904#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29905pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29906 simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16()))
29907}
29908
29909/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29910///
29911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
29912#[inline]
29913#[target_feature(enable = "avx512f")]
29914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29916pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29917 _mm512_cmpge_epi32_mask(a, b) & k1
29918}
29919
29920/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29921///
29922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
29923#[inline]
29924#[target_feature(enable = "avx512f,avx512vl")]
29925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29926#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29927pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29928 simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8()))
29929}
29930
29931/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29932///
29933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
29934#[inline]
29935#[target_feature(enable = "avx512f,avx512vl")]
29936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29937#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29938pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29939 _mm256_cmpge_epi32_mask(a, b) & k1
29940}
29941
29942/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29943///
29944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
29945#[inline]
29946#[target_feature(enable = "avx512f,avx512vl")]
29947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29948#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29949pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29950 simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4()))
29951}
29952
29953/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29954///
29955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
29956#[inline]
29957#[target_feature(enable = "avx512f,avx512vl")]
29958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29959#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29960pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29961 _mm_cmpge_epi32_mask(a, b) & k1
29962}
29963
29964/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
29965///
29966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
29967#[inline]
29968#[target_feature(enable = "avx512f")]
29969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29970#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29971pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29972 simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16()))
29973}
29974
29975/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29976///
29977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
29978#[inline]
29979#[target_feature(enable = "avx512f")]
29980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29981#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29982pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29983 _mm512_cmpeq_epi32_mask(a, b) & k1
29984}
29985
29986/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
29987///
29988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
29989#[inline]
29990#[target_feature(enable = "avx512f,avx512vl")]
29991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29992#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29993pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29994 simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8()))
29995}
29996
29997/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29998///
29999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
30000#[inline]
30001#[target_feature(enable = "avx512f,avx512vl")]
30002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30003#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30004pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30005 _mm256_cmpeq_epi32_mask(a, b) & k1
30006}
30007
30008/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
30009///
30010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
30011#[inline]
30012#[target_feature(enable = "avx512f,avx512vl")]
30013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30014#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30015pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30016 simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4()))
30017}
30018
30019/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30020///
30021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
30022#[inline]
30023#[target_feature(enable = "avx512f,avx512vl")]
30024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30025#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30026pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30027 _mm_cmpeq_epi32_mask(a, b) & k1
30028}
30029
30030/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30031///
30032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
30033#[inline]
30034#[target_feature(enable = "avx512f")]
30035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30036#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30037pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30038 simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16()))
30039}
30040
30041/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30042///
30043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
30044#[inline]
30045#[target_feature(enable = "avx512f")]
30046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30048pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30049 _mm512_cmpneq_epi32_mask(a, b) & k1
30050}
30051
30052/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30053///
30054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
30055#[inline]
30056#[target_feature(enable = "avx512f,avx512vl")]
30057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30058#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30059pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30060 simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8()))
30061}
30062
30063/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30064///
30065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
30066#[inline]
30067#[target_feature(enable = "avx512f,avx512vl")]
30068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30069#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30070pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30071 _mm256_cmpneq_epi32_mask(a, b) & k1
30072}
30073
30074/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30075///
30076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
30077#[inline]
30078#[target_feature(enable = "avx512f,avx512vl")]
30079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30080#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30081pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30082 simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4()))
30083}
30084
30085/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30086///
30087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
30088#[inline]
30089#[target_feature(enable = "avx512f,avx512vl")]
30090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30091#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30092pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30093 _mm_cmpneq_epi32_mask(a, b) & k1
30094}
30095
30096/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30097///
30098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
30099#[inline]
30100#[target_feature(enable = "avx512f")]
30101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30102#[rustc_legacy_const_generics(2)]
30103#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30104pub unsafe fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30105 a: __m512i,
30106 b: __m512i,
30107) -> __mmask16 {
30108 static_assert_uimm_bits!(IMM3, 3);
30109 let neg_one: i16 = -1;
30110 let a: i32x16 = a.as_i32x16();
30111 let b: i32x16 = b.as_i32x16();
30112 let r: i16 = vpcmpd(a, b, IMM3, m:neg_one);
30113 transmute(src:r)
30114}
30115
30116/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30117///
30118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
30119#[inline]
30120#[target_feature(enable = "avx512f")]
30121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30122#[rustc_legacy_const_generics(3)]
30123#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30124pub unsafe fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30125 k1: __mmask16,
30126 a: __m512i,
30127 b: __m512i,
30128) -> __mmask16 {
30129 static_assert_uimm_bits!(IMM3, 3);
30130 let a: i32x16 = a.as_i32x16();
30131 let b: i32x16 = b.as_i32x16();
30132 let r: i16 = vpcmpd(a, b, IMM3, m:k1 as i16);
30133 transmute(src:r)
30134}
30135
30136/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30137///
30138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
30139#[inline]
30140#[target_feature(enable = "avx512f,avx512vl")]
30141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30142#[rustc_legacy_const_generics(2)]
30143#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30144pub unsafe fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30145 a: __m256i,
30146 b: __m256i,
30147) -> __mmask8 {
30148 static_assert_uimm_bits!(IMM3, 3);
30149 let neg_one: i8 = -1;
30150 let a: i32x8 = a.as_i32x8();
30151 let b: i32x8 = b.as_i32x8();
30152 let r: i8 = vpcmpd256(a, b, IMM3, m:neg_one);
30153 transmute(src:r)
30154}
30155
30156/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30157///
30158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
30159#[inline]
30160#[target_feature(enable = "avx512f,avx512vl")]
30161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30162#[rustc_legacy_const_generics(3)]
30163#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30164pub unsafe fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30165 k1: __mmask8,
30166 a: __m256i,
30167 b: __m256i,
30168) -> __mmask8 {
30169 static_assert_uimm_bits!(IMM3, 3);
30170 let a: i32x8 = a.as_i32x8();
30171 let b: i32x8 = b.as_i32x8();
30172 let r: i8 = vpcmpd256(a, b, IMM3, m:k1 as i8);
30173 transmute(src:r)
30174}
30175
30176/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30177///
30178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
30179#[inline]
30180#[target_feature(enable = "avx512f,avx512vl")]
30181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30182#[rustc_legacy_const_generics(2)]
30183#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30184pub unsafe fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
30185 static_assert_uimm_bits!(IMM3, 3);
30186 let neg_one: i8 = -1;
30187 let a: i32x4 = a.as_i32x4();
30188 let b: i32x4 = b.as_i32x4();
30189 let r: i8 = vpcmpd128(a, b, IMM3, m:neg_one);
30190 transmute(src:r)
30191}
30192
30193/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30194///
30195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
30196#[inline]
30197#[target_feature(enable = "avx512f,avx512vl")]
30198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30199#[rustc_legacy_const_generics(3)]
30200#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30201pub unsafe fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30202 k1: __mmask8,
30203 a: __m128i,
30204 b: __m128i,
30205) -> __mmask8 {
30206 static_assert_uimm_bits!(IMM3, 3);
30207 let a: i32x4 = a.as_i32x4();
30208 let b: i32x4 = b.as_i32x4();
30209 let r: i8 = vpcmpd128(a, b, IMM3, m:k1 as i8);
30210 transmute(src:r)
30211}
30212
30213/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30214///
30215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
30216#[inline]
30217#[target_feature(enable = "avx512f")]
30218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30219#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30220pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30221 simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8()))
30222}
30223
30224/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30225///
30226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
30227#[inline]
30228#[target_feature(enable = "avx512f")]
30229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30230#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30231pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30232 _mm512_cmplt_epu64_mask(a, b) & k1
30233}
30234
30235/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30236///
30237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
30238#[inline]
30239#[target_feature(enable = "avx512f,avx512vl")]
30240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30241#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30242pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30243 simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4()))
30244}
30245
30246/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30247///
30248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
30249#[inline]
30250#[target_feature(enable = "avx512f,avx512vl")]
30251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30252#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30253pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30254 _mm256_cmplt_epu64_mask(a, b) & k1
30255}
30256
30257/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30258///
30259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
30260#[inline]
30261#[target_feature(enable = "avx512f,avx512vl")]
30262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30263#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30264pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30265 simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2()))
30266}
30267
30268/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30269///
30270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
30271#[inline]
30272#[target_feature(enable = "avx512f,avx512vl")]
30273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30274#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30275pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30276 _mm_cmplt_epu64_mask(a, b) & k1
30277}
30278
30279/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30280///
30281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
30282#[inline]
30283#[target_feature(enable = "avx512f")]
30284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30285#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30286pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30287 simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8()))
30288}
30289
30290/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30291///
30292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
30293#[inline]
30294#[target_feature(enable = "avx512f")]
30295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30296#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30297pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30298 _mm512_cmpgt_epu64_mask(a, b) & k1
30299}
30300
30301/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30302///
30303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
30304#[inline]
30305#[target_feature(enable = "avx512f,avx512vl")]
30306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30307#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30308pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30309 simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4()))
30310}
30311
30312/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30313///
30314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
30315#[inline]
30316#[target_feature(enable = "avx512f,avx512vl")]
30317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30318#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30319pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30320 _mm256_cmpgt_epu64_mask(a, b) & k1
30321}
30322
30323/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30324///
30325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
30326#[inline]
30327#[target_feature(enable = "avx512f,avx512vl")]
30328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30329#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30330pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30331 simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2()))
30332}
30333
30334/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30335///
30336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
30337#[inline]
30338#[target_feature(enable = "avx512f,avx512vl")]
30339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30340#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30341pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30342 _mm_cmpgt_epu64_mask(a, b) & k1
30343}
30344
30345/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30346///
30347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
30348#[inline]
30349#[target_feature(enable = "avx512f")]
30350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30351#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30352pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30353 simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8()))
30354}
30355
30356/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30357///
30358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
30359#[inline]
30360#[target_feature(enable = "avx512f")]
30361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30362#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30363pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30364 _mm512_cmple_epu64_mask(a, b) & k1
30365}
30366
30367/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30368///
30369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
30370#[inline]
30371#[target_feature(enable = "avx512f,avx512vl")]
30372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30373#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30374pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30375 simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4()))
30376}
30377
30378/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30379///
30380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
30381#[inline]
30382#[target_feature(enable = "avx512f,avx512vl")]
30383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30384#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30385pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30386 _mm256_cmple_epu64_mask(a, b) & k1
30387}
30388
30389/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30390///
30391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
30392#[inline]
30393#[target_feature(enable = "avx512f,avx512vl")]
30394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30395#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30396pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30397 simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2()))
30398}
30399
30400/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30401///
30402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
30403#[inline]
30404#[target_feature(enable = "avx512f,avx512vl")]
30405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30406#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30407pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30408 _mm_cmple_epu64_mask(a, b) & k1
30409}
30410
30411/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30412///
30413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
30414#[inline]
30415#[target_feature(enable = "avx512f")]
30416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30417#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30418pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30419 simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8()))
30420}
30421
30422/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30423///
30424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
30425#[inline]
30426#[target_feature(enable = "avx512f")]
30427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30428#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30429pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30430 _mm512_cmpge_epu64_mask(a, b) & k1
30431}
30432
30433/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30434///
30435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
30436#[inline]
30437#[target_feature(enable = "avx512f,avx512vl")]
30438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30439#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30440pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30441 simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4()))
30442}
30443
30444/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30445///
30446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
30447#[inline]
30448#[target_feature(enable = "avx512f,avx512vl")]
30449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30450#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30451pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30452 _mm256_cmpge_epu64_mask(a, b) & k1
30453}
30454
30455/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30456///
30457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
30458#[inline]
30459#[target_feature(enable = "avx512f,avx512vl")]
30460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30461#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30462pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30463 simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2()))
30464}
30465
30466/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30467///
30468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
30469#[inline]
30470#[target_feature(enable = "avx512f,avx512vl")]
30471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30472#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30473pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30474 _mm_cmpge_epu64_mask(a, b) & k1
30475}
30476
30477/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30478///
30479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
30480#[inline]
30481#[target_feature(enable = "avx512f")]
30482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30483#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30484pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30485 simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8()))
30486}
30487
30488/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30489///
30490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
30491#[inline]
30492#[target_feature(enable = "avx512f")]
30493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30494#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30495pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30496 _mm512_cmpeq_epu64_mask(a, b) & k1
30497}
30498
30499/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30500///
30501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
30502#[inline]
30503#[target_feature(enable = "avx512f,avx512vl")]
30504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30506pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30507 simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4()))
30508}
30509
30510/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30511///
30512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
30513#[inline]
30514#[target_feature(enable = "avx512f,avx512vl")]
30515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30516#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30517pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30518 _mm256_cmpeq_epu64_mask(a, b) & k1
30519}
30520
30521/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30522///
30523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
30524#[inline]
30525#[target_feature(enable = "avx512f,avx512vl")]
30526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30527#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30528pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30529 simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2()))
30530}
30531
30532/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30533///
30534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
30535#[inline]
30536#[target_feature(enable = "avx512f,avx512vl")]
30537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30538#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30539pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30540 _mm_cmpeq_epu64_mask(a, b) & k1
30541}
30542
30543/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30544///
30545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
30546#[inline]
30547#[target_feature(enable = "avx512f")]
30548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30549#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30550pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30551 simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8()))
30552}
30553
30554/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30555///
30556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
30557#[inline]
30558#[target_feature(enable = "avx512f")]
30559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30561pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30562 _mm512_cmpneq_epu64_mask(a, b) & k1
30563}
30564
30565/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30566///
30567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
30568#[inline]
30569#[target_feature(enable = "avx512f,avx512vl")]
30570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30571#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30572pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30573 simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4()))
30574}
30575
30576/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30577///
30578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
30579#[inline]
30580#[target_feature(enable = "avx512f,avx512vl")]
30581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30583pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30584 _mm256_cmpneq_epu64_mask(a, b) & k1
30585}
30586
30587/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30588///
30589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
30590#[inline]
30591#[target_feature(enable = "avx512f,avx512vl")]
30592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30593#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30594pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30595 simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2()))
30596}
30597
30598/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30599///
30600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
30601#[inline]
30602#[target_feature(enable = "avx512f,avx512vl")]
30603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30604#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30605pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30606 _mm_cmpneq_epu64_mask(a, b) & k1
30607}
30608
30609/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30610///
30611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
30612#[inline]
30613#[target_feature(enable = "avx512f")]
30614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30615#[rustc_legacy_const_generics(2)]
30616#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30617pub unsafe fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30618 a: __m512i,
30619 b: __m512i,
30620) -> __mmask8 {
30621 static_assert_uimm_bits!(IMM3, 3);
30622 let neg_one: i8 = -1;
30623 let a: i64x8 = a.as_i64x8();
30624 let b: i64x8 = b.as_i64x8();
30625 let r: i8 = vpcmpuq(a, b, IMM3, m:neg_one);
30626 transmute(src:r)
30627}
30628
30629/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30630///
30631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
30632#[inline]
30633#[target_feature(enable = "avx512f")]
30634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30635#[rustc_legacy_const_generics(3)]
30636#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30637pub unsafe fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30638 k1: __mmask8,
30639 a: __m512i,
30640 b: __m512i,
30641) -> __mmask8 {
30642 static_assert_uimm_bits!(IMM3, 3);
30643 let a: i64x8 = a.as_i64x8();
30644 let b: i64x8 = b.as_i64x8();
30645 let r: i8 = vpcmpuq(a, b, IMM3, m:k1 as i8);
30646 transmute(src:r)
30647}
30648
30649/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30650///
30651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
30652#[inline]
30653#[target_feature(enable = "avx512f,avx512vl")]
30654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30655#[rustc_legacy_const_generics(2)]
30656#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30657pub unsafe fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30658 a: __m256i,
30659 b: __m256i,
30660) -> __mmask8 {
30661 static_assert_uimm_bits!(IMM3, 3);
30662 let neg_one: i8 = -1;
30663 let a: i64x4 = a.as_i64x4();
30664 let b: i64x4 = b.as_i64x4();
30665 let r: i8 = vpcmpuq256(a, b, IMM3, m:neg_one);
30666 transmute(src:r)
30667}
30668
30669/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30670///
30671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
30672#[inline]
30673#[target_feature(enable = "avx512f,avx512vl")]
30674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30675#[rustc_legacy_const_generics(3)]
30676#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30677pub unsafe fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30678 k1: __mmask8,
30679 a: __m256i,
30680 b: __m256i,
30681) -> __mmask8 {
30682 static_assert_uimm_bits!(IMM3, 3);
30683 let a: i64x4 = a.as_i64x4();
30684 let b: i64x4 = b.as_i64x4();
30685 let r: i8 = vpcmpuq256(a, b, IMM3, m:k1 as i8);
30686 transmute(src:r)
30687}
30688
30689/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30690///
30691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
30692#[inline]
30693#[target_feature(enable = "avx512f,avx512vl")]
30694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30695#[rustc_legacy_const_generics(2)]
30696#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30697pub unsafe fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
30698 static_assert_uimm_bits!(IMM3, 3);
30699 let neg_one: i8 = -1;
30700 let a: i64x2 = a.as_i64x2();
30701 let b: i64x2 = b.as_i64x2();
30702 let r: i8 = vpcmpuq128(a, b, IMM3, m:neg_one);
30703 transmute(src:r)
30704}
30705
30706/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30707///
30708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
30709#[inline]
30710#[target_feature(enable = "avx512f,avx512vl")]
30711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30712#[rustc_legacy_const_generics(3)]
30713#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30714pub unsafe fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30715 k1: __mmask8,
30716 a: __m128i,
30717 b: __m128i,
30718) -> __mmask8 {
30719 static_assert_uimm_bits!(IMM3, 3);
30720 let a: i64x2 = a.as_i64x2();
30721 let b: i64x2 = b.as_i64x2();
30722 let r: i8 = vpcmpuq128(a, b, IMM3, m:k1 as i8);
30723 transmute(src:r)
30724}
30725
30726/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30727///
30728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
30729#[inline]
30730#[target_feature(enable = "avx512f")]
30731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30732#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30733pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30734 simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8()))
30735}
30736
30737/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30738///
30739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
30740#[inline]
30741#[target_feature(enable = "avx512f")]
30742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30743#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30744pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30745 _mm512_cmplt_epi64_mask(a, b) & k1
30746}
30747
30748/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30749///
30750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
30751#[inline]
30752#[target_feature(enable = "avx512f,avx512vl")]
30753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30754#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30755pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30756 simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4()))
30757}
30758
30759/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30760///
30761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
30762#[inline]
30763#[target_feature(enable = "avx512f,avx512vl")]
30764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30765#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30766pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30767 _mm256_cmplt_epi64_mask(a, b) & k1
30768}
30769
30770/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30771///
30772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
30773#[inline]
30774#[target_feature(enable = "avx512f,avx512vl")]
30775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30777pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30778 simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2()))
30779}
30780
30781/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30782///
30783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
30784#[inline]
30785#[target_feature(enable = "avx512f,avx512vl")]
30786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30787#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30788pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30789 _mm_cmplt_epi64_mask(a, b) & k1
30790}
30791
30792/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30793///
30794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
30795#[inline]
30796#[target_feature(enable = "avx512f")]
30797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30798#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30799pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30800 simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8()))
30801}
30802
30803/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30804///
30805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
30806#[inline]
30807#[target_feature(enable = "avx512f")]
30808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30809#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30810pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30811 _mm512_cmpgt_epi64_mask(a, b) & k1
30812}
30813
30814/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30815///
30816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
30817#[inline]
30818#[target_feature(enable = "avx512f,avx512vl")]
30819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30820#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30821pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30822 simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4()))
30823}
30824
30825/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30826///
30827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
30828#[inline]
30829#[target_feature(enable = "avx512f,avx512vl")]
30830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30831#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30832pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30833 _mm256_cmpgt_epi64_mask(a, b) & k1
30834}
30835
30836/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30837///
30838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
30839#[inline]
30840#[target_feature(enable = "avx512f,avx512vl")]
30841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30842#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30843pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30844 simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2()))
30845}
30846
30847/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30848///
30849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
30850#[inline]
30851#[target_feature(enable = "avx512f,avx512vl")]
30852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30853#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30854pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30855 _mm_cmpgt_epi64_mask(a, b) & k1
30856}
30857
30858/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30859///
30860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
30861#[inline]
30862#[target_feature(enable = "avx512f")]
30863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30864#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30865pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30866 simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8()))
30867}
30868
30869/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30870///
30871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
30872#[inline]
30873#[target_feature(enable = "avx512f")]
30874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30875#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30876pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30877 _mm512_cmple_epi64_mask(a, b) & k1
30878}
30879
30880/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30881///
30882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
30883#[inline]
30884#[target_feature(enable = "avx512f,avx512vl")]
30885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30886#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30887pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30888 simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4()))
30889}
30890
30891/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30892///
30893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
30894#[inline]
30895#[target_feature(enable = "avx512f,avx512vl")]
30896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30897#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30898pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30899 _mm256_cmple_epi64_mask(a, b) & k1
30900}
30901
30902/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30903///
30904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
30905#[inline]
30906#[target_feature(enable = "avx512f,avx512vl")]
30907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30908#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30909pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30910 simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2()))
30911}
30912
30913/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30914///
30915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
30916#[inline]
30917#[target_feature(enable = "avx512f,avx512vl")]
30918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30919#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30920pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30921 _mm_cmple_epi64_mask(a, b) & k1
30922}
30923
30924/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30925///
30926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
30927#[inline]
30928#[target_feature(enable = "avx512f")]
30929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30930#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30931pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30932 simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8()))
30933}
30934
30935/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30936///
30937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
30938#[inline]
30939#[target_feature(enable = "avx512f")]
30940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30941#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30942pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30943 _mm512_cmpge_epi64_mask(a, b) & k1
30944}
30945
30946/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30947///
30948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
30949#[inline]
30950#[target_feature(enable = "avx512f,avx512vl")]
30951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30952#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30953pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30954 simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4()))
30955}
30956
30957/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30958///
30959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
30960#[inline]
30961#[target_feature(enable = "avx512f,avx512vl")]
30962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30963#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30964pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30965 _mm256_cmpge_epi64_mask(a, b) & k1
30966}
30967
30968/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30969///
30970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
30971#[inline]
30972#[target_feature(enable = "avx512f,avx512vl")]
30973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30974#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30975pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30976 simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2()))
30977}
30978
30979/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30980///
30981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
30982#[inline]
30983#[target_feature(enable = "avx512f,avx512vl")]
30984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30985#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30986pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30987 _mm_cmpge_epi64_mask(a, b) & k1
30988}
30989
30990/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
30991///
30992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
30993#[inline]
30994#[target_feature(enable = "avx512f")]
30995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30996#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30997pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30998 simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8()))
30999}
31000
31001/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31002///
31003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
31004#[inline]
31005#[target_feature(enable = "avx512f")]
31006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31007#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31008pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31009 _mm512_cmpeq_epi64_mask(a, b) & k1
31010}
31011
31012/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31013///
31014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
31015#[inline]
31016#[target_feature(enable = "avx512f,avx512vl")]
31017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31018#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31019pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31020 simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4()))
31021}
31022
31023/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31024///
31025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
31026#[inline]
31027#[target_feature(enable = "avx512f,avx512vl")]
31028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31029#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31030pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31031 _mm256_cmpeq_epi64_mask(a, b) & k1
31032}
31033
31034/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31035///
31036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
31037#[inline]
31038#[target_feature(enable = "avx512f,avx512vl")]
31039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31040#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31041pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31042 simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2()))
31043}
31044
31045/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31046///
31047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
31048#[inline]
31049#[target_feature(enable = "avx512f,avx512vl")]
31050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31051#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31052pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31053 _mm_cmpeq_epi64_mask(a, b) & k1
31054}
31055
31056/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31057///
31058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
31059#[inline]
31060#[target_feature(enable = "avx512f")]
31061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31062#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31063pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31064 simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8()))
31065}
31066
31067/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31068///
31069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
31070#[inline]
31071#[target_feature(enable = "avx512f")]
31072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31073#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31074pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31075 _mm512_cmpneq_epi64_mask(a, b) & k1
31076}
31077
31078/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31079///
31080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
31081#[inline]
31082#[target_feature(enable = "avx512f,avx512vl")]
31083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31084#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31085pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31086 simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4()))
31087}
31088
31089/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31090///
31091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
31092#[inline]
31093#[target_feature(enable = "avx512f,avx512vl")]
31094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31095#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31096pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31097 _mm256_cmpneq_epi64_mask(a, b) & k1
31098}
31099
31100/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31101///
31102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
31103#[inline]
31104#[target_feature(enable = "avx512f,avx512vl")]
31105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31106#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31107pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31108 simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2()))
31109}
31110
31111/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31112///
31113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
31114#[inline]
31115#[target_feature(enable = "avx512f,avx512vl")]
31116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31117#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31118pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31119 _mm_cmpneq_epi64_mask(a, b) & k1
31120}
31121
31122/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31123///
31124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
31125#[inline]
31126#[target_feature(enable = "avx512f")]
31127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31128#[rustc_legacy_const_generics(2)]
31129#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31130pub unsafe fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31131 a: __m512i,
31132 b: __m512i,
31133) -> __mmask8 {
31134 static_assert_uimm_bits!(IMM3, 3);
31135 let neg_one: i8 = -1;
31136 let a: i64x8 = a.as_i64x8();
31137 let b: i64x8 = b.as_i64x8();
31138 let r: i8 = vpcmpq(a, b, IMM3, m:neg_one);
31139 transmute(src:r)
31140}
31141
31142/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31143///
31144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
31145#[inline]
31146#[target_feature(enable = "avx512f")]
31147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31148#[rustc_legacy_const_generics(3)]
31149#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31150pub unsafe fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31151 k1: __mmask8,
31152 a: __m512i,
31153 b: __m512i,
31154) -> __mmask8 {
31155 static_assert_uimm_bits!(IMM3, 3);
31156 let a: i64x8 = a.as_i64x8();
31157 let b: i64x8 = b.as_i64x8();
31158 let r: i8 = vpcmpq(a, b, IMM3, m:k1 as i8);
31159 transmute(src:r)
31160}
31161
31162/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31163///
31164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
31165#[inline]
31166#[target_feature(enable = "avx512f,avx512vl")]
31167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31168#[rustc_legacy_const_generics(2)]
31169#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31170pub unsafe fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31171 a: __m256i,
31172 b: __m256i,
31173) -> __mmask8 {
31174 static_assert_uimm_bits!(IMM3, 3);
31175 let neg_one: i8 = -1;
31176 let a: i64x4 = a.as_i64x4();
31177 let b: i64x4 = b.as_i64x4();
31178 let r: i8 = vpcmpq256(a, b, IMM3, m:neg_one);
31179 transmute(src:r)
31180}
31181
31182/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31183///
31184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
31185#[inline]
31186#[target_feature(enable = "avx512f,avx512vl")]
31187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31188#[rustc_legacy_const_generics(3)]
31189#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31190pub unsafe fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31191 k1: __mmask8,
31192 a: __m256i,
31193 b: __m256i,
31194) -> __mmask8 {
31195 static_assert_uimm_bits!(IMM3, 3);
31196 let a: i64x4 = a.as_i64x4();
31197 let b: i64x4 = b.as_i64x4();
31198 let r: i8 = vpcmpq256(a, b, IMM3, m:k1 as i8);
31199 transmute(src:r)
31200}
31201
31202/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31203///
31204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
31205#[inline]
31206#[target_feature(enable = "avx512f,avx512vl")]
31207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31208#[rustc_legacy_const_generics(2)]
31209#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31210pub unsafe fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31211 static_assert_uimm_bits!(IMM3, 3);
31212 let neg_one: i8 = -1;
31213 let a: i64x2 = a.as_i64x2();
31214 let b: i64x2 = b.as_i64x2();
31215 let r: i8 = vpcmpq128(a, b, IMM3, m:neg_one);
31216 transmute(src:r)
31217}
31218
31219/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31220///
31221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
31222#[inline]
31223#[target_feature(enable = "avx512f,avx512vl")]
31224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31225#[rustc_legacy_const_generics(3)]
31226#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31227pub unsafe fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31228 k1: __mmask8,
31229 a: __m128i,
31230 b: __m128i,
31231) -> __mmask8 {
31232 static_assert_uimm_bits!(IMM3, 3);
31233 let a: i64x2 = a.as_i64x2();
31234 let b: i64x2 = b.as_i64x2();
31235 let r: i8 = vpcmpq128(a, b, IMM3, m:k1 as i8);
31236 transmute(src:r)
31237}
31238
31239/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
31240///
31241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
31242#[inline]
31243#[target_feature(enable = "avx512f")]
31244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31245pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
31246 simd_reduce_add_unordered(a.as_i32x16())
31247}
31248
31249/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31250///
31251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
31252#[inline]
31253#[target_feature(enable = "avx512f")]
31254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31255pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
31256 simd_reduce_add_unordered(simd_select_bitmask(
31257 m:k,
31258 yes:a.as_i32x16(),
31259 no:_mm512_setzero_si512().as_i32x16(),
31260 ))
31261}
31262
31263/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
31264///
31265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
31266#[inline]
31267#[target_feature(enable = "avx512f")]
31268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31269pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
31270 simd_reduce_add_unordered(a.as_i64x8())
31271}
31272
31273/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31274///
31275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
31276#[inline]
31277#[target_feature(enable = "avx512f")]
31278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31279pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
31280 simd_reduce_add_unordered(simd_select_bitmask(
31281 m:k,
31282 yes:a.as_i64x8(),
31283 no:_mm512_setzero_si512().as_i64x8(),
31284 ))
31285}
31286
31287/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
31288///
31289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
31290#[inline]
31291#[target_feature(enable = "avx512f")]
31292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31293pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
31294 simd_reduce_add_unordered(a.as_f32x16())
31295}
31296
31297/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
31298///
31299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
31300#[inline]
31301#[target_feature(enable = "avx512f")]
31302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31303pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
31304 simd_reduce_add_unordered(simd_select_bitmask(
31305 m:k,
31306 yes:a.as_f32x16(),
31307 no:_mm512_setzero_ps().as_f32x16(),
31308 ))
31309}
31310
31311/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
31312///
31313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
31314#[inline]
31315#[target_feature(enable = "avx512f")]
31316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31317pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
31318 simd_reduce_add_unordered(a.as_f64x8())
31319}
31320
31321/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
31322///
31323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
31324#[inline]
31325#[target_feature(enable = "avx512f")]
31326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31327pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
31328 simd_reduce_add_unordered(simd_select_bitmask(
31329 m:k,
31330 yes:a.as_f64x8(),
31331 no:_mm512_setzero_pd().as_f64x8(),
31332 ))
31333}
31334
31335/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
31336///
31337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
31338#[inline]
31339#[target_feature(enable = "avx512f")]
31340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31341pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
31342 simd_reduce_mul_unordered(a.as_i32x16())
31343}
31344
31345/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
31346///
31347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
31348#[inline]
31349#[target_feature(enable = "avx512f")]
31350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31351pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
31352 simd_reduce_mul_unordered(simd_select_bitmask(
31353 m:k,
31354 yes:a.as_i32x16(),
31355 no:_mm512_set1_epi32(1).as_i32x16(),
31356 ))
31357}
31358
31359/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
31360///
31361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
31362#[inline]
31363#[target_feature(enable = "avx512f")]
31364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31365pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
31366 simd_reduce_mul_unordered(a.as_i64x8())
31367}
31368
31369/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
31370///
31371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
31372#[inline]
31373#[target_feature(enable = "avx512f")]
31374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31375pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
31376 simd_reduce_mul_unordered(simd_select_bitmask(
31377 m:k,
31378 yes:a.as_i64x8(),
31379 no:_mm512_set1_epi64(1).as_i64x8(),
31380 ))
31381}
31382
31383/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
31384///
31385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
31386#[inline]
31387#[target_feature(enable = "avx512f")]
31388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31389pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
31390 simd_reduce_mul_unordered(a.as_f32x16())
31391}
31392
31393/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
31394///
31395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
31396#[inline]
31397#[target_feature(enable = "avx512f")]
31398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31399pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
31400 simd_reduce_mul_unordered(simd_select_bitmask(
31401 m:k,
31402 yes:a.as_f32x16(),
31403 no:_mm512_set1_ps(1.).as_f32x16(),
31404 ))
31405}
31406
31407/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
31408///
31409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
31410#[inline]
31411#[target_feature(enable = "avx512f")]
31412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31413pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
31414 simd_reduce_mul_unordered(a.as_f64x8())
31415}
31416
31417/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
31418///
31419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
31420#[inline]
31421#[target_feature(enable = "avx512f")]
31422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31423pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
31424 simd_reduce_mul_unordered(simd_select_bitmask(
31425 m:k,
31426 yes:a.as_f64x8(),
31427 no:_mm512_set1_pd(1.).as_f64x8(),
31428 ))
31429}
31430
31431/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
31432///
31433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
31434#[inline]
31435#[target_feature(enable = "avx512f")]
31436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31437pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
31438 simd_reduce_max(a.as_i32x16())
31439}
31440
31441/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31442///
31443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
31444#[inline]
31445#[target_feature(enable = "avx512f")]
31446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31447pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
31448 simd_reduce_max(simd_select_bitmask(
31449 m:k,
31450 yes:a.as_i32x16(),
31451 no:_mm512_undefined_epi32().as_i32x16(),
31452 ))
31453}
31454
31455/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
31456///
31457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
31458#[inline]
31459#[target_feature(enable = "avx512f")]
31460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31461pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
31462 simd_reduce_max(a.as_i64x8())
31463}
31464
31465/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31466///
31467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
31468#[inline]
31469#[target_feature(enable = "avx512f")]
31470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31471pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
31472 simd_reduce_max(simd_select_bitmask(
31473 m:k,
31474 yes:a.as_i64x8(),
31475 no:_mm512_set1_epi64(0).as_i64x8(),
31476 ))
31477}
31478
31479/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
31482#[inline]
31483#[target_feature(enable = "avx512f")]
31484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31485pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
31486 simd_reduce_max(a.as_u32x16())
31487}
31488
31489/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31490///
31491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
31492#[inline]
31493#[target_feature(enable = "avx512f")]
31494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31495pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
31496 simd_reduce_max(simd_select_bitmask(
31497 m:k,
31498 yes:a.as_u32x16(),
31499 no:_mm512_undefined_epi32().as_u32x16(),
31500 ))
31501}
31502
31503/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
31504///
31505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
31506#[inline]
31507#[target_feature(enable = "avx512f")]
31508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31509pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
31510 simd_reduce_max(a.as_u64x8())
31511}
31512
31513/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31514///
31515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
31516#[inline]
31517#[target_feature(enable = "avx512f")]
31518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31519pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
31520 simd_reduce_max(simd_select_bitmask(
31521 m:k,
31522 yes:a.as_u64x8(),
31523 no:_mm512_set1_epi64(0).as_u64x8(),
31524 ))
31525}
31526
31527/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
31528///
31529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
31530#[inline]
31531#[target_feature(enable = "avx512f")]
31532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31533pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 {
31534 simd_reduce_max(a.as_f32x16())
31535}
31536
31537/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
31538///
31539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
31540#[inline]
31541#[target_feature(enable = "avx512f")]
31542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31543pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
31544 simd_reduce_max(simd_select_bitmask(
31545 m:k,
31546 yes:a.as_f32x16(),
31547 no:_mm512_undefined_ps().as_f32x16(),
31548 ))
31549}
31550
31551/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
31552///
31553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
31554#[inline]
31555#[target_feature(enable = "avx512f")]
31556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31557pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
31558 simd_reduce_max(a.as_f64x8())
31559}
31560
31561/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
31562///
31563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
31564#[inline]
31565#[target_feature(enable = "avx512f")]
31566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31567pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
31568 simd_reduce_max(simd_select_bitmask(
31569 m:k,
31570 yes:a.as_f64x8(),
31571 no:_mm512_undefined_pd().as_f64x8(),
31572 ))
31573}
31574
31575/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
31576///
31577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
31578#[inline]
31579#[target_feature(enable = "avx512f")]
31580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31581pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
31582 simd_reduce_min(a.as_i32x16())
31583}
31584
31585/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31586///
31587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
31588#[inline]
31589#[target_feature(enable = "avx512f")]
31590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31591pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
31592 simd_reduce_min(simd_select_bitmask(
31593 m:k,
31594 yes:a.as_i32x16(),
31595 no:_mm512_undefined_epi32().as_i32x16(),
31596 ))
31597}
31598
31599/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
31600///
31601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
31602#[inline]
31603#[target_feature(enable = "avx512f")]
31604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31605pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
31606 simd_reduce_min(a.as_i64x8())
31607}
31608
31609/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31610///
31611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
31612#[inline]
31613#[target_feature(enable = "avx512f")]
31614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31615pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
31616 simd_reduce_min(simd_select_bitmask(
31617 m:k,
31618 yes:a.as_i64x8(),
31619 no:_mm512_set1_epi64(0).as_i64x8(),
31620 ))
31621}
31622
31623/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
31624///
31625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
31626#[inline]
31627#[target_feature(enable = "avx512f")]
31628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31629pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
31630 simd_reduce_min(a.as_u32x16())
31631}
31632
31633/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31634///
31635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
31636#[inline]
31637#[target_feature(enable = "avx512f")]
31638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31639pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
31640 simd_reduce_min(simd_select_bitmask(
31641 m:k,
31642 yes:a.as_u32x16(),
31643 no:_mm512_undefined_epi32().as_u32x16(),
31644 ))
31645}
31646
31647/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
31648///
31649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
31650#[inline]
31651#[target_feature(enable = "avx512f")]
31652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31653pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
31654 simd_reduce_min(a.as_u64x8())
31655}
31656
31657/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31658///
31659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
31660#[inline]
31661#[target_feature(enable = "avx512f")]
31662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31663pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
31664 simd_reduce_min(simd_select_bitmask(
31665 m:k,
31666 yes:a.as_u64x8(),
31667 no:_mm512_set1_epi64(0).as_u64x8(),
31668 ))
31669}
31670
31671/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
31672///
31673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
31674#[inline]
31675#[target_feature(enable = "avx512f")]
31676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31677pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 {
31678 simd_reduce_min(a.as_f32x16())
31679}
31680
31681/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
31682///
31683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
31684#[inline]
31685#[target_feature(enable = "avx512f")]
31686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31687pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
31688 simd_reduce_min(simd_select_bitmask(
31689 m:k,
31690 yes:a.as_f32x16(),
31691 no:_mm512_undefined_ps().as_f32x16(),
31692 ))
31693}
31694
31695/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
31696///
31697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
31698#[inline]
31699#[target_feature(enable = "avx512f")]
31700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31701pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
31702 simd_reduce_min(a.as_f64x8())
31703}
31704
31705/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
31706///
31707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
31708#[inline]
31709#[target_feature(enable = "avx512f")]
31710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31711pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
31712 simd_reduce_min(simd_select_bitmask(
31713 m:k,
31714 yes:a.as_f64x8(),
31715 no:_mm512_undefined_pd().as_f64x8(),
31716 ))
31717}
31718
31719/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
31720///
31721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
31722#[inline]
31723#[target_feature(enable = "avx512f")]
31724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31725pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
31726 simd_reduce_and(a.as_i32x16())
31727}
31728
31729/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
31730///
31731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
31732#[inline]
31733#[target_feature(enable = "avx512f")]
31734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31735pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
31736 simd_reduce_and(simd_select_bitmask(
31737 m:k,
31738 yes:a.as_i32x16(),
31739 no:_mm512_set1_epi32(0xFF).as_i32x16(),
31740 ))
31741}
31742
31743/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
31744///
31745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
31746#[inline]
31747#[target_feature(enable = "avx512f")]
31748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31749pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
31750 simd_reduce_and(a.as_i64x8())
31751}
31752
31753/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31754///
31755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
31756#[inline]
31757#[target_feature(enable = "avx512f")]
31758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31759pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
31760 simd_reduce_and(simd_select_bitmask(
31761 m:k,
31762 yes:a.as_i64x8(),
31763 no:_mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
31764 .as_i64x8(),
31765 ))
31766}
31767
31768/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
31769///
31770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
31771#[inline]
31772#[target_feature(enable = "avx512f")]
31773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31774pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
31775 simd_reduce_or(a.as_i32x16())
31776}
31777
31778/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
31779///
31780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
31781#[inline]
31782#[target_feature(enable = "avx512f")]
31783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31784pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
31785 simd_reduce_or(simd_select_bitmask(
31786 m:k,
31787 yes:a.as_i32x16(),
31788 no:_mm512_setzero_si512().as_i32x16(),
31789 ))
31790}
31791
31792/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
31793///
31794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
31795#[inline]
31796#[target_feature(enable = "avx512f")]
31797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31798pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
31799 simd_reduce_or(a.as_i64x8())
31800}
31801
31802/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
31803///
31804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
31805#[inline]
31806#[target_feature(enable = "avx512f")]
31807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31808pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
31809 simd_reduce_or(simd_select_bitmask(
31810 m:k,
31811 yes:a.as_i64x8(),
31812 no:_mm512_setzero_si512().as_i64x8(),
31813 ))
31814}
31815
31816/// Returns vector of type `__m512d` with indeterminate elements.
31817/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31818/// In practice, this is equivalent to [`mem::zeroed`].
31819///
31820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
31821#[inline]
31822#[target_feature(enable = "avx512f")]
31823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31824// This intrinsic has no corresponding instruction.
31825pub unsafe fn _mm512_undefined_pd() -> __m512d {
31826 _mm512_set1_pd(0.0)
31827}
31828
31829/// Returns vector of type `__m512` with indeterminate elements.
31830/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31831/// In practice, this is equivalent to [`mem::zeroed`].
31832///
31833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
31834#[inline]
31835#[target_feature(enable = "avx512f")]
31836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31837// This intrinsic has no corresponding instruction.
31838pub unsafe fn _mm512_undefined_ps() -> __m512 {
31839 _mm512_set1_ps(0.0)
31840}
31841
31842/// Return vector of type __m512i with indeterminate elements.
31843/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31844/// In practice, this is equivalent to [`mem::zeroed`].
31845///
31846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
31847#[inline]
31848#[target_feature(enable = "avx512f")]
31849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31850// This intrinsic has no corresponding instruction.
31851pub unsafe fn _mm512_undefined_epi32() -> __m512i {
31852 _mm512_set1_epi32(0)
31853}
31854
31855/// Return vector of type __m512 with indeterminate elements.
31856/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
31857/// In practice, this is equivalent to [`mem::zeroed`].
31858///
31859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
31860#[inline]
31861#[target_feature(enable = "avx512f")]
31862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31863// This intrinsic has no corresponding instruction.
31864pub unsafe fn _mm512_undefined() -> __m512 {
31865 _mm512_set1_ps(0.0)
31866}
31867
31868/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
31869///
31870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
31871#[inline]
31872#[target_feature(enable = "avx512f")]
31873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31874#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
31875pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
31876 ptr::read_unaligned(src:mem_addr as *const __m512i)
31877}
31878
31879/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
31880///
31881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
31882#[inline]
31883#[target_feature(enable = "avx512f,avx512vl")]
31884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31885#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
31886pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
31887 ptr::read_unaligned(src:mem_addr as *const __m256i)
31888}
31889
31890/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
31891///
31892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
31893#[inline]
31894#[target_feature(enable = "avx512f,avx512vl")]
31895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31896#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
31897pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
31898 ptr::read_unaligned(src:mem_addr as *const __m128i)
31899}
31900
31901/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31902///
31903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
31904#[inline]
31905#[target_feature(enable = "avx512f")]
31906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31907#[cfg_attr(test, assert_instr(vpmovdw))]
31908pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
31909 vpmovdwmem(mem_addr, a:a.as_i32x16(), mask:k);
31910}
31911
31912/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31913///
31914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
31915#[inline]
31916#[target_feature(enable = "avx512f,avx512vl")]
31917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31918#[cfg_attr(test, assert_instr(vpmovdw))]
31919pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
31920 vpmovdwmem256(mem_addr, a:a.as_i32x8(), mask:k);
31921}
31922
31923/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31924///
31925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
31926#[inline]
31927#[target_feature(enable = "avx512f,avx512vl")]
31928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31929#[cfg_attr(test, assert_instr(vpmovdw))]
31930pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
31931 vpmovdwmem128(mem_addr, a:a.as_i32x4(), mask:k);
31932}
31933
31934/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31935///
31936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
31937#[inline]
31938#[target_feature(enable = "avx512f")]
31939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31940#[cfg_attr(test, assert_instr(vpmovsdw))]
31941pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
31942 vpmovsdwmem(mem_addr, a:a.as_i32x16(), mask:k);
31943}
31944
31945/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31946///
31947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
31948#[inline]
31949#[target_feature(enable = "avx512f,avx512vl")]
31950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31951#[cfg_attr(test, assert_instr(vpmovsdw))]
31952pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
31953 vpmovsdwmem256(mem_addr, a:a.as_i32x8(), mask:k);
31954}
31955
31956/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31957///
31958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
31959#[inline]
31960#[target_feature(enable = "avx512f,avx512vl")]
31961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31962#[cfg_attr(test, assert_instr(vpmovsdw))]
31963pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
31964 vpmovsdwmem128(mem_addr, a:a.as_i32x4(), mask:k);
31965}
31966
31967/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31968///
31969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
31970#[inline]
31971#[target_feature(enable = "avx512f")]
31972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31973#[cfg_attr(test, assert_instr(vpmovusdw))]
31974pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
31975 vpmovusdwmem(mem_addr, a:a.as_i32x16(), mask:k);
31976}
31977
31978/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31979///
31980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
31981#[inline]
31982#[target_feature(enable = "avx512f,avx512vl")]
31983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31984#[cfg_attr(test, assert_instr(vpmovusdw))]
31985pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
31986 vpmovusdwmem256(mem_addr, a:a.as_i32x8(), mask:k);
31987}
31988
31989/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
31990///
31991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
31992#[inline]
31993#[target_feature(enable = "avx512f,avx512vl")]
31994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31995#[cfg_attr(test, assert_instr(vpmovusdw))]
31996pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
31997 vpmovusdwmem128(mem_addr, a:a.as_i32x4(), mask:k);
31998}
31999
32000/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32001///
32002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
32003#[inline]
32004#[target_feature(enable = "avx512f")]
32005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32006#[cfg_attr(test, assert_instr(vpmovdb))]
32007pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32008 vpmovdbmem(mem_addr, a:a.as_i32x16(), mask:k);
32009}
32010
32011/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32012///
32013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
32014#[inline]
32015#[target_feature(enable = "avx512f,avx512vl")]
32016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32017#[cfg_attr(test, assert_instr(vpmovdb))]
32018pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32019 vpmovdbmem256(mem_addr, a:a.as_i32x8(), mask:k);
32020}
32021
32022/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32023///
32024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
32025#[inline]
32026#[target_feature(enable = "avx512f,avx512vl")]
32027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32028#[cfg_attr(test, assert_instr(vpmovdb))]
32029pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32030 vpmovdbmem128(mem_addr, a:a.as_i32x4(), mask:k);
32031}
32032
32033/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32034///
32035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
32036#[inline]
32037#[target_feature(enable = "avx512f")]
32038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32039#[cfg_attr(test, assert_instr(vpmovsdb))]
32040pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32041 vpmovsdbmem(mem_addr, a:a.as_i32x16(), mask:k);
32042}
32043
32044/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32045///
32046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
32047#[inline]
32048#[target_feature(enable = "avx512f,avx512vl")]
32049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32050#[cfg_attr(test, assert_instr(vpmovsdb))]
32051pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32052 vpmovsdbmem256(mem_addr, a:a.as_i32x8(), mask:k);
32053}
32054
32055/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32056///
32057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
32058#[inline]
32059#[target_feature(enable = "avx512f,avx512vl")]
32060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32061#[cfg_attr(test, assert_instr(vpmovsdb))]
32062pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32063 vpmovsdbmem128(mem_addr, a:a.as_i32x4(), mask:k);
32064}
32065
32066/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32067///
32068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
32069#[inline]
32070#[target_feature(enable = "avx512f")]
32071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32072#[cfg_attr(test, assert_instr(vpmovusdb))]
32073pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32074 vpmovusdbmem(mem_addr, a:a.as_i32x16(), mask:k);
32075}
32076
32077/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32078///
32079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
32080#[inline]
32081#[target_feature(enable = "avx512f,avx512vl")]
32082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32083#[cfg_attr(test, assert_instr(vpmovusdb))]
32084pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32085 vpmovusdbmem256(mem_addr, a:a.as_i32x8(), mask:k);
32086}
32087
32088/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32089///
32090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
32091#[inline]
32092#[target_feature(enable = "avx512f,avx512vl")]
32093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32094#[cfg_attr(test, assert_instr(vpmovusdb))]
32095pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32096 vpmovusdbmem128(mem_addr, a:a.as_i32x4(), mask:k);
32097}
32098
32099/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32100///
32101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
32102#[inline]
32103#[target_feature(enable = "avx512f")]
32104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32105#[cfg_attr(test, assert_instr(vpmovqw))]
32106pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32107 vpmovqwmem(mem_addr, a:a.as_i64x8(), mask:k);
32108}
32109
32110/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32111///
32112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
32113#[inline]
32114#[target_feature(enable = "avx512f,avx512vl")]
32115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32116#[cfg_attr(test, assert_instr(vpmovqw))]
32117pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32118 vpmovqwmem256(mem_addr, a:a.as_i64x4(), mask:k);
32119}
32120
32121/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32122///
32123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
32124#[inline]
32125#[target_feature(enable = "avx512f,avx512vl")]
32126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32127#[cfg_attr(test, assert_instr(vpmovqw))]
32128pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32129 vpmovqwmem128(mem_addr, a:a.as_i64x2(), mask:k);
32130}
32131
32132/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32133///
32134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
32135#[inline]
32136#[target_feature(enable = "avx512f")]
32137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32138#[cfg_attr(test, assert_instr(vpmovsqw))]
32139pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32140 vpmovsqwmem(mem_addr, a:a.as_i64x8(), mask:k);
32141}
32142
32143/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32144///
32145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
32146#[inline]
32147#[target_feature(enable = "avx512f,avx512vl")]
32148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32149#[cfg_attr(test, assert_instr(vpmovsqw))]
32150pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32151 vpmovsqwmem256(mem_addr, a:a.as_i64x4(), mask:k);
32152}
32153
32154/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32155///
32156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
32157#[inline]
32158#[target_feature(enable = "avx512f,avx512vl")]
32159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32160#[cfg_attr(test, assert_instr(vpmovsqw))]
32161pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32162 vpmovsqwmem128(mem_addr, a:a.as_i64x2(), mask:k);
32163}
32164
32165/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32166///
32167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
32168#[inline]
32169#[target_feature(enable = "avx512f")]
32170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32171#[cfg_attr(test, assert_instr(vpmovusqw))]
32172pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32173 vpmovusqwmem(mem_addr, a:a.as_i64x8(), mask:k);
32174}
32175
32176/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32177///
32178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
32179#[inline]
32180#[target_feature(enable = "avx512f,avx512vl")]
32181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32182#[cfg_attr(test, assert_instr(vpmovusqw))]
32183pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32184 vpmovusqwmem256(mem_addr, a:a.as_i64x4(), mask:k);
32185}
32186
32187/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32188///
32189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
32190#[inline]
32191#[target_feature(enable = "avx512f,avx512vl")]
32192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32193#[cfg_attr(test, assert_instr(vpmovusqw))]
32194pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32195 vpmovusqwmem128(mem_addr, a:a.as_i64x2(), mask:k);
32196}
32197
32198/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32199///
32200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
32201#[inline]
32202#[target_feature(enable = "avx512f")]
32203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32204#[cfg_attr(test, assert_instr(vpmovqb))]
32205pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32206 vpmovqbmem(mem_addr, a:a.as_i64x8(), mask:k);
32207}
32208
32209/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32210///
32211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
32212#[inline]
32213#[target_feature(enable = "avx512f,avx512vl")]
32214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32215#[cfg_attr(test, assert_instr(vpmovqb))]
32216pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32217 vpmovqbmem256(mem_addr, a:a.as_i64x4(), mask:k);
32218}
32219
32220/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32221///
32222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
32223#[inline]
32224#[target_feature(enable = "avx512f,avx512vl")]
32225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32226#[cfg_attr(test, assert_instr(vpmovqb))]
32227pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32228 vpmovqbmem128(mem_addr, a:a.as_i64x2(), mask:k);
32229}
32230
32231/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32232///
32233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
32234#[inline]
32235#[target_feature(enable = "avx512f")]
32236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32237#[cfg_attr(test, assert_instr(vpmovsqb))]
32238pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32239 vpmovsqbmem(mem_addr, a:a.as_i64x8(), mask:k);
32240}
32241
32242/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32243///
32244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
32245#[inline]
32246#[target_feature(enable = "avx512f,avx512vl")]
32247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32248#[cfg_attr(test, assert_instr(vpmovsqb))]
32249pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32250 vpmovsqbmem256(mem_addr, a:a.as_i64x4(), mask:k);
32251}
32252
32253/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32254///
32255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
32256#[inline]
32257#[target_feature(enable = "avx512f,avx512vl")]
32258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32259#[cfg_attr(test, assert_instr(vpmovsqb))]
32260pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32261 vpmovsqbmem128(mem_addr, a:a.as_i64x2(), mask:k);
32262}
32263
32264/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32265///
32266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
32267#[inline]
32268#[target_feature(enable = "avx512f")]
32269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32270#[cfg_attr(test, assert_instr(vpmovusqb))]
32271pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32272 vpmovusqbmem(mem_addr, a:a.as_i64x8(), mask:k);
32273}
32274
32275/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32276///
32277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
32278#[inline]
32279#[target_feature(enable = "avx512f,avx512vl")]
32280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32281#[cfg_attr(test, assert_instr(vpmovusqb))]
32282pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32283 vpmovusqbmem256(mem_addr, a:a.as_i64x4(), mask:k);
32284}
32285
32286/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32287///
32288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
32289#[inline]
32290#[target_feature(enable = "avx512f,avx512vl")]
32291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32292#[cfg_attr(test, assert_instr(vpmovusqb))]
32293pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32294 vpmovusqbmem128(mem_addr, a:a.as_i64x2(), mask:k);
32295}
32296
32297///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32298///
32299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
32300#[inline]
32301#[target_feature(enable = "avx512f")]
32302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32303#[cfg_attr(test, assert_instr(vpmovqd))]
32304pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32305 vpmovqdmem(mem_addr, a:a.as_i64x8(), mask:k);
32306}
32307
32308///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32309///
32310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
32311#[inline]
32312#[target_feature(enable = "avx512f,avx512vl")]
32313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32314#[cfg_attr(test, assert_instr(vpmovqd))]
32315pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32316 vpmovqdmem256(mem_addr, a:a.as_i64x4(), mask:k);
32317}
32318
32319///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32320///
32321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
32322#[inline]
32323#[target_feature(enable = "avx512f,avx512vl")]
32324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32325#[cfg_attr(test, assert_instr(vpmovqd))]
32326pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32327 vpmovqdmem128(mem_addr, a:a.as_i64x2(), mask:k);
32328}
32329
32330/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32331///
32332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
32333#[inline]
32334#[target_feature(enable = "avx512f")]
32335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32336#[cfg_attr(test, assert_instr(vpmovsqd))]
32337pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32338 vpmovsqdmem(mem_addr, a:a.as_i64x8(), mask:k);
32339}
32340
32341/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32342///
32343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
32344#[inline]
32345#[target_feature(enable = "avx512f,avx512vl")]
32346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32347#[cfg_attr(test, assert_instr(vpmovsqd))]
32348pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32349 vpmovsqdmem256(mem_addr, a:a.as_i64x4(), mask:k);
32350}
32351
32352/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32353///
32354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
32355#[inline]
32356#[target_feature(enable = "avx512f,avx512vl")]
32357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32358#[cfg_attr(test, assert_instr(vpmovsqd))]
32359pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32360 vpmovsqdmem128(mem_addr, a:a.as_i64x2(), mask:k);
32361}
32362
32363/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32364///
32365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
32366#[inline]
32367#[target_feature(enable = "avx512f")]
32368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32369#[cfg_attr(test, assert_instr(vpmovusqd))]
32370pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32371 vpmovusqdmem(mem_addr, a:a.as_i64x8(), mask:k);
32372}
32373
32374/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32375///
32376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
32377#[inline]
32378#[target_feature(enable = "avx512f,avx512vl")]
32379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32380#[cfg_attr(test, assert_instr(vpmovusqd))]
32381pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32382 vpmovusqdmem256(mem_addr, a:a.as_i64x4(), mask:k);
32383}
32384
32385/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32386///
32387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
32388#[inline]
32389#[target_feature(enable = "avx512f,avx512vl")]
32390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32391#[cfg_attr(test, assert_instr(vpmovusqd))]
32392pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32393 vpmovusqdmem128(mem_addr, a:a.as_i64x2(), mask:k);
32394}
32395
32396/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32397///
32398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
32399#[inline]
32400#[target_feature(enable = "avx512f")]
32401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32402#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32403pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
32404 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
32405}
32406
32407/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32408///
32409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
32410#[inline]
32411#[target_feature(enable = "avx512f,avx512vl")]
32412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32413#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32414pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
32415 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
32416}
32417
32418/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32419///
32420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
32421#[inline]
32422#[target_feature(enable = "avx512f,avx512vl")]
32423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32425pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
32426 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
32427}
32428
32429/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32430///
32431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
32432#[inline]
32433#[target_feature(enable = "avx512f")]
32434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32435#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32436pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
32437 ptr::read_unaligned(src:mem_addr as *const __m512i)
32438}
32439
32440/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32441///
32442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
32443#[inline]
32444#[target_feature(enable = "avx512f,avx512vl")]
32445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32446#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32447pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
32448 ptr::read_unaligned(src:mem_addr as *const __m256i)
32449}
32450
32451/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32452///
32453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
32454#[inline]
32455#[target_feature(enable = "avx512f,avx512vl")]
32456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32457#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32458pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
32459 ptr::read_unaligned(src:mem_addr as *const __m128i)
32460}
32461
32462/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32463///
32464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
32465#[inline]
32466#[target_feature(enable = "avx512f")]
32467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32468#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32469pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
32470 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
32471}
32472
32473/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32474///
32475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
32476#[inline]
32477#[target_feature(enable = "avx512f,avx512vl")]
32478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32479#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32480pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
32481 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
32482}
32483
32484/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32485///
32486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
32487#[inline]
32488#[target_feature(enable = "avx512f,avx512vl")]
32489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32490#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32491pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
32492 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
32493}
32494
32495/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32496///
32497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
32498#[inline]
32499#[target_feature(enable = "avx512f")]
32500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32501#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32502pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
32503 ptr::read_unaligned(src:mem_addr as *const __m512i)
32504}
32505
32506/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
32507///
32508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
32509#[inline]
32510#[target_feature(enable = "avx512f")]
32511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32512#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32513pub unsafe fn _mm512_storeu_si512(mem_addr: *mut i32, a: __m512i) {
32514 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
32515}
32516
32517/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
32518/// floating-point elements) from memory into result.
32519/// `mem_addr` does not need to be aligned on any particular boundary.
32520///
32521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
32522#[inline]
32523#[target_feature(enable = "avx512f")]
32524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32525#[cfg_attr(test, assert_instr(vmovups))]
32526pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
32527 ptr::read_unaligned(src:mem_addr as *const __m512d)
32528}
32529
32530/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
32531/// floating-point elements) from `a` into memory.
32532/// `mem_addr` does not need to be aligned on any particular boundary.
32533///
32534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
32535#[inline]
32536#[target_feature(enable = "avx512f")]
32537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32538#[cfg_attr(test, assert_instr(vmovups))]
32539pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
32540 ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
32541}
32542
32543/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
32544/// floating-point elements) from memory into result.
32545/// `mem_addr` does not need to be aligned on any particular boundary.
32546///
32547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
32548#[inline]
32549#[target_feature(enable = "avx512f")]
32550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32551#[cfg_attr(test, assert_instr(vmovups))]
32552pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
32553 ptr::read_unaligned(src:mem_addr as *const __m512)
32554}
32555
32556/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
32557/// floating-point elements) from `a` into memory.
32558/// `mem_addr` does not need to be aligned on any particular boundary.
32559///
32560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
32561#[inline]
32562#[target_feature(enable = "avx512f")]
32563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32564#[cfg_attr(test, assert_instr(vmovups))]
32565pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
32566 ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
32567}
32568
32569/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32570///
32571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_load_si512&expand=3345)
32572#[inline]
32573#[target_feature(enable = "avx512f")]
32574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32575#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32576pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
32577 ptr::read(src:mem_addr as *const __m512i)
32578}
32579
32580/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32581///
32582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_store_si512&expand=5598)
32583#[inline]
32584#[target_feature(enable = "avx512f")]
32585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32586#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32587pub unsafe fn _mm512_store_si512(mem_addr: *mut i32, a: __m512i) {
32588 ptr::write(dst:mem_addr as *mut __m512i, src:a);
32589}
32590
32591/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32592///
32593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
32594#[inline]
32595#[target_feature(enable = "avx512f")]
32596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32597#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32598pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
32599 ptr::read(src:mem_addr as *const __m512i)
32600}
32601
32602/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32603///
32604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
32605#[inline]
32606#[target_feature(enable = "avx512f,avx512vl")]
32607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32608#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32609pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
32610 ptr::read(src:mem_addr as *const __m256i)
32611}
32612
32613/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32614///
32615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
32616#[inline]
32617#[target_feature(enable = "avx512f,avx512vl")]
32618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32619#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32620pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
32621 ptr::read(src:mem_addr as *const __m128i)
32622}
32623
32624/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32625///
32626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_store_epi32&expand=5569)
32627#[inline]
32628#[target_feature(enable = "avx512f")]
32629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32630#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32631pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
32632 ptr::write(dst:mem_addr as *mut __m512i, src:a);
32633}
32634
32635/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32636///
32637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
32638#[inline]
32639#[target_feature(enable = "avx512f,avx512vl")]
32640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32641#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32642pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
32643 ptr::write(dst:mem_addr as *mut __m256i, src:a);
32644}
32645
32646/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32647///
32648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
32649#[inline]
32650#[target_feature(enable = "avx512f,avx512vl")]
32651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32652#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32653pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
32654 ptr::write(dst:mem_addr as *mut __m128i, src:a);
32655}
32656
32657/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32658///
32659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
32660#[inline]
32661#[target_feature(enable = "avx512f")]
32662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32663#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32664pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
32665 ptr::read(src:mem_addr as *const __m512i)
32666}
32667
32668/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32669///
32670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
32671#[inline]
32672#[target_feature(enable = "avx512f,avx512vl")]
32673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32674#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32675pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
32676 ptr::read(src:mem_addr as *const __m256i)
32677}
32678
32679/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32680///
32681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
32682#[inline]
32683#[target_feature(enable = "avx512f,avx512vl")]
32684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32685#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32686pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
32687 ptr::read(src:mem_addr as *const __m128i)
32688}
32689
32690/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32691///
32692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
32693#[inline]
32694#[target_feature(enable = "avx512f")]
32695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32696#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32697pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
32698 ptr::write(dst:mem_addr as *mut __m512i, src:a);
32699}
32700
32701/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32702///
32703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
32704#[inline]
32705#[target_feature(enable = "avx512f,avx512vl")]
32706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32707#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32708pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
32709 ptr::write(dst:mem_addr as *mut __m256i, src:a);
32710}
32711
32712/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32713///
32714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
32715#[inline]
32716#[target_feature(enable = "avx512f,avx512vl")]
32717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32718#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32719pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
32720 ptr::write(dst:mem_addr as *mut __m128i, src:a);
32721}
32722
32723/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32724///
32725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
32726#[inline]
32727#[target_feature(enable = "avx512f")]
32728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32729#[cfg_attr(test, assert_instr(vmovaps))]
32730pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
32731 ptr::read(src:mem_addr as *const __m512)
32732}
32733
32734/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32735///
32736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
32737#[inline]
32738#[target_feature(enable = "avx512f")]
32739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32740#[cfg_attr(test, assert_instr(vmovaps))]
32741pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
32742 ptr::write(dst:mem_addr as *mut __m512, src:a);
32743}
32744
32745/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32746///
32747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
32748#[inline]
32749#[target_feature(enable = "avx512f")]
32750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32751#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
32752pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
32753 ptr::read(src:mem_addr as *const __m512d)
32754}
32755
32756/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32757///
32758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
32759#[inline]
32760#[target_feature(enable = "avx512f")]
32761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32762#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
32763pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
32764 ptr::write(dst:mem_addr as *mut __m512d, src:a);
32765}
32766
32767/// Load packed 32-bit integers from memory into dst using writemask k
32768/// (elements are copied from src when the corresponding mask bit is not set).
32769/// mem_addr does not need to be aligned on any particular boundary.
32770///
32771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
32772#[inline]
32773#[target_feature(enable = "avx512f")]
32774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32775pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
32776 let mut dst: __m512i = src;
32777 asm!(
32778 vpl!("vmovdqu32 {dst}{{{k}}}"),
32779 p = in(reg) mem_addr,
32780 k = in(kreg) k,
32781 dst = inout(zmm_reg) dst,
32782 options(pure, readonly, nostack)
32783 );
32784 dst
32785}
32786
32787/// Load packed 32-bit integers from memory into dst using zeromask k
32788/// (elements are zeroed out when the corresponding mask bit is not set).
32789/// mem_addr does not need to be aligned on any particular boundary.
32790///
32791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
32792#[inline]
32793#[target_feature(enable = "avx512f")]
32794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32795pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
32796 let mut dst: __m512i;
32797 asm!(
32798 vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"),
32799 p = in(reg) mem_addr,
32800 k = in(kreg) k,
32801 dst = out(zmm_reg) dst,
32802 options(pure, readonly, nostack)
32803 );
32804 dst
32805}
32806
32807/// Load packed 64-bit integers from memory into dst using writemask k
32808/// (elements are copied from src when the corresponding mask bit is not set).
32809/// mem_addr does not need to be aligned on any particular boundary.
32810///
32811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
32812#[inline]
32813#[target_feature(enable = "avx512f")]
32814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32815pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
32816 let mut dst: __m512i = src;
32817 asm!(
32818 vpl!("vmovdqu64 {dst}{{{k}}}"),
32819 p = in(reg) mem_addr,
32820 k = in(kreg) k,
32821 dst = inout(zmm_reg) dst,
32822 options(pure, readonly, nostack)
32823 );
32824 dst
32825}
32826
32827/// Load packed 64-bit integers from memory into dst using zeromask k
32828/// (elements are zeroed out when the corresponding mask bit is not set).
32829/// mem_addr does not need to be aligned on any particular boundary.
32830///
32831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
32832#[inline]
32833#[target_feature(enable = "avx512f")]
32834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32835pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
32836 let mut dst: __m512i;
32837 asm!(
32838 vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"),
32839 p = in(reg) mem_addr,
32840 k = in(kreg) k,
32841 dst = out(zmm_reg) dst,
32842 options(pure, readonly, nostack)
32843 );
32844 dst
32845}
32846
32847/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
32848/// (elements are copied from src when the corresponding mask bit is not set).
32849/// mem_addr does not need to be aligned on any particular boundary.
32850///
32851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
32852#[inline]
32853#[target_feature(enable = "avx512f")]
32854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32855pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
32856 let mut dst: __m512 = src;
32857 asm!(
32858 vpl!("vmovups {dst}{{{k}}}"),
32859 p = in(reg) mem_addr,
32860 k = in(kreg) k,
32861 dst = inout(zmm_reg) dst,
32862 options(pure, readonly, nostack)
32863 );
32864 dst
32865}
32866
32867/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
32868/// (elements are zeroed out when the corresponding mask bit is not set).
32869/// mem_addr does not need to be aligned on any particular boundary.
32870///
32871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
32872#[inline]
32873#[target_feature(enable = "avx512f")]
32874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32875pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
32876 let mut dst: __m512;
32877 asm!(
32878 vpl!("vmovups {dst}{{{k}}} {{z}}"),
32879 p = in(reg) mem_addr,
32880 k = in(kreg) k,
32881 dst = out(zmm_reg) dst,
32882 options(pure, readonly, nostack)
32883 );
32884 dst
32885}
32886
32887/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
32888/// (elements are copied from src when the corresponding mask bit is not set).
32889/// mem_addr does not need to be aligned on any particular boundary.
32890///
32891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
32892#[inline]
32893#[target_feature(enable = "avx512f")]
32894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32895pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
32896 let mut dst: __m512d = src;
32897 asm!(
32898 vpl!("vmovupd {dst}{{{k}}}"),
32899 p = in(reg) mem_addr,
32900 k = in(kreg) k,
32901 dst = inout(zmm_reg) dst,
32902 options(pure, readonly, nostack)
32903 );
32904 dst
32905}
32906
32907/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
32908/// (elements are zeroed out when the corresponding mask bit is not set).
32909/// mem_addr does not need to be aligned on any particular boundary.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
32912#[inline]
32913#[target_feature(enable = "avx512f")]
32914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32915pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
32916 let mut dst: __m512d;
32917 asm!(
32918 vpl!("vmovupd {dst}{{{k}}} {{z}}"),
32919 p = in(reg) mem_addr,
32920 k = in(kreg) k,
32921 dst = out(zmm_reg) dst,
32922 options(pure, readonly, nostack)
32923 );
32924 dst
32925}
32926
32927/// Load packed 32-bit integers from memory into dst using writemask k
32928/// (elements are copied from src when the corresponding mask bit is not set).
32929/// mem_addr does not need to be aligned on any particular boundary.
32930///
32931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
32932#[inline]
32933#[target_feature(enable = "avx512f,avx512vl,avx")]
32934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32935pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
32936 let mut dst: __m256i = src;
32937 asm!(
32938 vpl!("vmovdqu32 {dst}{{{k}}}"),
32939 p = in(reg) mem_addr,
32940 k = in(kreg) k,
32941 dst = inout(ymm_reg) dst,
32942 options(pure, readonly, nostack)
32943 );
32944 dst
32945}
32946
32947/// Load packed 32-bit integers from memory into dst using zeromask k
32948/// (elements are zeroed out when the corresponding mask bit is not set).
32949/// mem_addr does not need to be aligned on any particular boundary.
32950///
32951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
32952#[inline]
32953#[target_feature(enable = "avx512f,avx512vl,avx")]
32954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32955pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
32956 let mut dst: __m256i;
32957 asm!(
32958 vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"),
32959 p = in(reg) mem_addr,
32960 k = in(kreg) k,
32961 dst = out(ymm_reg) dst,
32962 options(pure, readonly, nostack)
32963 );
32964 dst
32965}
32966
32967/// Load packed 64-bit integers from memory into dst using writemask k
32968/// (elements are copied from src when the corresponding mask bit is not set).
32969/// mem_addr does not need to be aligned on any particular boundary.
32970///
32971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
32972#[inline]
32973#[target_feature(enable = "avx512f,avx512vl,avx")]
32974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32975pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
32976 let mut dst: __m256i = src;
32977 asm!(
32978 vpl!("vmovdqu64 {dst}{{{k}}}"),
32979 p = in(reg) mem_addr,
32980 k = in(kreg) k,
32981 dst = inout(ymm_reg) dst,
32982 options(pure, readonly, nostack)
32983 );
32984 dst
32985}
32986
32987/// Load packed 64-bit integers from memory into dst using zeromask k
32988/// (elements are zeroed out when the corresponding mask bit is not set).
32989/// mem_addr does not need to be aligned on any particular boundary.
32990///
32991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
32992#[inline]
32993#[target_feature(enable = "avx512f,avx512vl,avx")]
32994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32995pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
32996 let mut dst: __m256i;
32997 asm!(
32998 vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"),
32999 p = in(reg) mem_addr,
33000 k = in(kreg) k,
33001 dst = out(ymm_reg) dst,
33002 options(pure, readonly, nostack)
33003 );
33004 dst
33005}
33006
33007/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33008/// (elements are copied from src when the corresponding mask bit is not set).
33009/// mem_addr does not need to be aligned on any particular boundary.
33010///
33011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
33012#[inline]
33013#[target_feature(enable = "avx512f,avx512vl,avx")]
33014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33015pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
33016 let mut dst: __m256 = src;
33017 asm!(
33018 vpl!("vmovups {dst}{{{k}}}"),
33019 p = in(reg) mem_addr,
33020 k = in(kreg) k,
33021 dst = inout(ymm_reg) dst,
33022 options(pure, readonly, nostack)
33023 );
33024 dst
33025}
33026
33027/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33028/// (elements are zeroed out when the corresponding mask bit is not set).
33029/// mem_addr does not need to be aligned on any particular boundary.
33030///
33031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
33032#[inline]
33033#[target_feature(enable = "avx512f,avx512vl,avx")]
33034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33035pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
33036 let mut dst: __m256;
33037 asm!(
33038 vpl!("vmovups {dst}{{{k}}} {{z}}"),
33039 p = in(reg) mem_addr,
33040 k = in(kreg) k,
33041 dst = out(ymm_reg) dst,
33042 options(pure, readonly, nostack)
33043 );
33044 dst
33045}
33046
33047/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33048/// (elements are copied from src when the corresponding mask bit is not set).
33049/// mem_addr does not need to be aligned on any particular boundary.
33050///
33051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
33052#[inline]
33053#[target_feature(enable = "avx512f,avx512vl,avx")]
33054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33055pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
33056 let mut dst: __m256d = src;
33057 asm!(
33058 vpl!("vmovupd {dst}{{{k}}}"),
33059 p = in(reg) mem_addr,
33060 k = in(kreg) k,
33061 dst = inout(ymm_reg) dst,
33062 options(pure, readonly, nostack)
33063 );
33064 dst
33065}
33066
33067/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33068/// (elements are zeroed out when the corresponding mask bit is not set).
33069/// mem_addr does not need to be aligned on any particular boundary.
33070///
33071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
33072#[inline]
33073#[target_feature(enable = "avx512f,avx512vl,avx")]
33074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33075pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
33076 let mut dst: __m256d;
33077 asm!(
33078 vpl!("vmovupd {dst}{{{k}}} {{z}}"),
33079 p = in(reg) mem_addr,
33080 k = in(kreg) k,
33081 dst = out(ymm_reg) dst,
33082 options(pure, readonly, nostack)
33083 );
33084 dst
33085}
33086
33087/// Load packed 32-bit integers from memory into dst using writemask k
33088/// (elements are copied from src when the corresponding mask bit is not set).
33089/// mem_addr does not need to be aligned on any particular boundary.
33090///
33091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
33092#[inline]
33093#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33095pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
33096 let mut dst: __m128i = src;
33097 asm!(
33098 vpl!("vmovdqu32 {dst}{{{k}}}"),
33099 p = in(reg) mem_addr,
33100 k = in(kreg) k,
33101 dst = inout(xmm_reg) dst,
33102 options(pure, readonly, nostack)
33103 );
33104 dst
33105}
33106
33107/// Load packed 32-bit integers from memory into dst using zeromask k
33108/// (elements are zeroed out when the corresponding mask bit is not set).
33109/// mem_addr does not need to be aligned on any particular boundary.
33110///
33111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
33112#[inline]
33113#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33115pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
33116 let mut dst: __m128i;
33117 asm!(
33118 vpl!("vmovdqu32 {dst}{{{k}}} {{z}}"),
33119 p = in(reg) mem_addr,
33120 k = in(kreg) k,
33121 dst = out(xmm_reg) dst,
33122 options(pure, readonly, nostack)
33123 );
33124 dst
33125}
33126
33127/// Load packed 64-bit integers from memory into dst using writemask k
33128/// (elements are copied from src when the corresponding mask bit is not set).
33129/// mem_addr does not need to be aligned on any particular boundary.
33130///
33131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
33132#[inline]
33133#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33135pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
33136 let mut dst: __m128i = src;
33137 asm!(
33138 vpl!("vmovdqu64 {dst}{{{k}}}"),
33139 p = in(reg) mem_addr,
33140 k = in(kreg) k,
33141 dst = inout(xmm_reg) dst,
33142 options(pure, readonly, nostack)
33143 );
33144 dst
33145}
33146
33147/// Load packed 64-bit integers from memory into dst using zeromask k
33148/// (elements are zeroed out when the corresponding mask bit is not set).
33149/// mem_addr does not need to be aligned on any particular boundary.
33150///
33151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
33152#[inline]
33153#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33155pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
33156 let mut dst: __m128i;
33157 asm!(
33158 vpl!("vmovdqu64 {dst}{{{k}}} {{z}}"),
33159 p = in(reg) mem_addr,
33160 k = in(kreg) k,
33161 dst = out(xmm_reg) dst,
33162 options(pure, readonly, nostack)
33163 );
33164 dst
33165}
33166
33167/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33168/// (elements are copied from src when the corresponding mask bit is not set).
33169/// mem_addr does not need to be aligned on any particular boundary.
33170///
33171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
33172#[inline]
33173#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33175pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33176 let mut dst: __m128 = src;
33177 asm!(
33178 vpl!("vmovups {dst}{{{k}}}"),
33179 p = in(reg) mem_addr,
33180 k = in(kreg) k,
33181 dst = inout(xmm_reg) dst,
33182 options(pure, readonly, nostack)
33183 );
33184 dst
33185}
33186
33187/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33188/// (elements are zeroed out when the corresponding mask bit is not set).
33189/// mem_addr does not need to be aligned on any particular boundary.
33190///
33191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
33192#[inline]
33193#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33195pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
33196 let mut dst: __m128;
33197 asm!(
33198 vpl!("vmovups {dst}{{{k}}} {{z}}"),
33199 p = in(reg) mem_addr,
33200 k = in(kreg) k,
33201 dst = out(xmm_reg) dst,
33202 options(pure, readonly, nostack)
33203 );
33204 dst
33205}
33206
33207/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33208/// (elements are copied from src when the corresponding mask bit is not set).
33209/// mem_addr does not need to be aligned on any particular boundary.
33210///
33211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
33212#[inline]
33213#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33215pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33216 let mut dst: __m128d = src;
33217 asm!(
33218 vpl!("vmovupd {dst}{{{k}}}"),
33219 p = in(reg) mem_addr,
33220 k = in(kreg) k,
33221 dst = inout(xmm_reg) dst,
33222 options(pure, readonly, nostack)
33223 );
33224 dst
33225}
33226
33227/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33228/// (elements are zeroed out when the corresponding mask bit is not set).
33229/// mem_addr does not need to be aligned on any particular boundary.
33230///
33231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
33232#[inline]
33233#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33235pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33236 let mut dst: __m128d;
33237 asm!(
33238 vpl!("vmovupd {dst}{{{k}}} {{z}}"),
33239 p = in(reg) mem_addr,
33240 k = in(kreg) k,
33241 dst = out(xmm_reg) dst,
33242 options(pure, readonly, nostack)
33243 );
33244 dst
33245}
33246
33247/// Load packed 32-bit integers from memory into dst using writemask k
33248/// (elements are copied from src when the corresponding mask bit is not set).
33249/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33250///
33251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
33252#[inline]
33253#[target_feature(enable = "avx512f")]
33254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33255pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
33256 let mut dst: __m512i = src;
33257 asm!(
33258 vpl!("vmovdqa32 {dst}{{{k}}}"),
33259 p = in(reg) mem_addr,
33260 k = in(kreg) k,
33261 dst = inout(zmm_reg) dst,
33262 options(pure, readonly, nostack)
33263 );
33264 dst
33265}
33266
33267/// Load packed 32-bit integers from memory into dst using zeromask k
33268/// (elements are zeroed out when the corresponding mask bit is not set).
33269/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33270///
33271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
33272#[inline]
33273#[target_feature(enable = "avx512f")]
33274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33275pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
33276 let mut dst: __m512i;
33277 asm!(
33278 vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
33279 p = in(reg) mem_addr,
33280 k = in(kreg) k,
33281 dst = out(zmm_reg) dst,
33282 options(pure, readonly, nostack)
33283 );
33284 dst
33285}
33286
33287/// Load packed 64-bit integers from memory into dst using writemask k
33288/// (elements are copied from src when the corresponding mask bit is not set).
33289/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33290///
33291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
33292#[inline]
33293#[target_feature(enable = "avx512f")]
33294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33295pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
33296 let mut dst: __m512i = src;
33297 asm!(
33298 vpl!("vmovdqa64 {dst}{{{k}}}"),
33299 p = in(reg) mem_addr,
33300 k = in(kreg) k,
33301 dst = inout(zmm_reg) dst,
33302 options(pure, readonly, nostack)
33303 );
33304 dst
33305}
33306
33307/// Load packed 64-bit integers from memory into dst using zeromask k
33308/// (elements are zeroed out when the corresponding mask bit is not set).
33309/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33310///
33311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
33312#[inline]
33313#[target_feature(enable = "avx512f")]
33314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33315pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
33316 let mut dst: __m512i;
33317 asm!(
33318 vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
33319 p = in(reg) mem_addr,
33320 k = in(kreg) k,
33321 dst = out(zmm_reg) dst,
33322 options(pure, readonly, nostack)
33323 );
33324 dst
33325}
33326
33327/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33328/// (elements are copied from src when the corresponding mask bit is not set).
33329/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33330///
33331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
33332#[inline]
33333#[target_feature(enable = "avx512f")]
33334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33335pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
33336 let mut dst: __m512 = src;
33337 asm!(
33338 vpl!("vmovaps {dst}{{{k}}}"),
33339 p = in(reg) mem_addr,
33340 k = in(kreg) k,
33341 dst = inout(zmm_reg) dst,
33342 options(pure, readonly, nostack)
33343 );
33344 dst
33345}
33346
33347/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33348/// (elements are zeroed out when the corresponding mask bit is not set).
33349/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33350///
33351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
33352#[inline]
33353#[target_feature(enable = "avx512f")]
33354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33355pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
33356 let mut dst: __m512;
33357 asm!(
33358 vpl!("vmovaps {dst}{{{k}}} {{z}}"),
33359 p = in(reg) mem_addr,
33360 k = in(kreg) k,
33361 dst = out(zmm_reg) dst,
33362 options(pure, readonly, nostack)
33363 );
33364 dst
33365}
33366
33367/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33368/// (elements are copied from src when the corresponding mask bit is not set).
33369/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33370///
33371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
33372#[inline]
33373#[target_feature(enable = "avx512f")]
33374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33375pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
33376 let mut dst: __m512d = src;
33377 asm!(
33378 vpl!("vmovapd {dst}{{{k}}}"),
33379 p = in(reg) mem_addr,
33380 k = in(kreg) k,
33381 dst = inout(zmm_reg) dst,
33382 options(pure, readonly, nostack)
33383 );
33384 dst
33385}
33386
33387/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33388/// (elements are zeroed out when the corresponding mask bit is not set).
33389/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33390///
33391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
33392#[inline]
33393#[target_feature(enable = "avx512f")]
33394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33395pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
33396 let mut dst: __m512d;
33397 asm!(
33398 vpl!("vmovapd {dst}{{{k}}} {{z}}"),
33399 p = in(reg) mem_addr,
33400 k = in(kreg) k,
33401 dst = out(zmm_reg) dst,
33402 options(pure, readonly, nostack)
33403 );
33404 dst
33405}
33406
33407/// Load packed 32-bit integers from memory into dst using writemask k
33408/// (elements are copied from src when the corresponding mask bit is not set).
33409/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33410///
33411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
33412#[inline]
33413#[target_feature(enable = "avx512f,avx512vl,avx")]
33414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33415pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
33416 let mut dst: __m256i = src;
33417 asm!(
33418 vpl!("vmovdqa32 {dst}{{{k}}}"),
33419 p = in(reg) mem_addr,
33420 k = in(kreg) k,
33421 dst = inout(ymm_reg) dst,
33422 options(pure, readonly, nostack)
33423 );
33424 dst
33425}
33426
33427/// Load packed 32-bit integers from memory into dst using zeromask k
33428/// (elements are zeroed out when the corresponding mask bit is not set).
33429/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33430///
33431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
33432#[inline]
33433#[target_feature(enable = "avx512f,avx512vl,avx")]
33434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33435pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
33436 let mut dst: __m256i;
33437 asm!(
33438 vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
33439 p = in(reg) mem_addr,
33440 k = in(kreg) k,
33441 dst = out(ymm_reg) dst,
33442 options(pure, readonly, nostack)
33443 );
33444 dst
33445}
33446
33447/// Load packed 64-bit integers from memory into dst using writemask k
33448/// (elements are copied from src when the corresponding mask bit is not set).
33449/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33450///
33451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
33452#[inline]
33453#[target_feature(enable = "avx512f,avx512vl,avx")]
33454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33455pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
33456 let mut dst: __m256i = src;
33457 asm!(
33458 vpl!("vmovdqa64 {dst}{{{k}}}"),
33459 p = in(reg) mem_addr,
33460 k = in(kreg) k,
33461 dst = inout(ymm_reg) dst,
33462 options(pure, readonly, nostack)
33463 );
33464 dst
33465}
33466
33467/// Load packed 64-bit integers from memory into dst using zeromask k
33468/// (elements are zeroed out when the corresponding mask bit is not set).
33469/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33470///
33471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
33472#[inline]
33473#[target_feature(enable = "avx512f,avx512vl,avx")]
33474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33475pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
33476 let mut dst: __m256i;
33477 asm!(
33478 vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
33479 p = in(reg) mem_addr,
33480 k = in(kreg) k,
33481 dst = out(ymm_reg) dst,
33482 options(pure, readonly, nostack)
33483 );
33484 dst
33485}
33486
33487/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33488/// (elements are copied from src when the corresponding mask bit is not set).
33489/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33490///
33491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
33492#[inline]
33493#[target_feature(enable = "avx512f,avx512vl,avx")]
33494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33495pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
33496 let mut dst: __m256 = src;
33497 asm!(
33498 vpl!("vmovaps {dst}{{{k}}}"),
33499 p = in(reg) mem_addr,
33500 k = in(kreg) k,
33501 dst = inout(ymm_reg) dst,
33502 options(pure, readonly, nostack)
33503 );
33504 dst
33505}
33506
33507/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33508/// (elements are zeroed out when the corresponding mask bit is not set).
33509/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33510///
33511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
33512#[inline]
33513#[target_feature(enable = "avx512f,avx512vl,avx")]
33514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33515pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
33516 let mut dst: __m256;
33517 asm!(
33518 vpl!("vmovaps {dst}{{{k}}} {{z}}"),
33519 p = in(reg) mem_addr,
33520 k = in(kreg) k,
33521 dst = out(ymm_reg) dst,
33522 options(pure, readonly, nostack)
33523 );
33524 dst
33525}
33526
33527/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33528/// (elements are copied from src when the corresponding mask bit is not set).
33529/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33530///
33531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
33532#[inline]
33533#[target_feature(enable = "avx512f,avx512vl,avx")]
33534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33535pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
33536 let mut dst: __m256d = src;
33537 asm!(
33538 vpl!("vmovapd {dst}{{{k}}}"),
33539 p = in(reg) mem_addr,
33540 k = in(kreg) k,
33541 dst = inout(ymm_reg) dst,
33542 options(pure, readonly, nostack)
33543 );
33544 dst
33545}
33546
33547/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33548/// (elements are zeroed out when the corresponding mask bit is not set).
33549/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33550///
33551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
33552#[inline]
33553#[target_feature(enable = "avx512f,avx512vl,avx")]
33554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33555pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
33556 let mut dst: __m256d;
33557 asm!(
33558 vpl!("vmovapd {dst}{{{k}}} {{z}}"),
33559 p = in(reg) mem_addr,
33560 k = in(kreg) k,
33561 dst = out(ymm_reg) dst,
33562 options(pure, readonly, nostack)
33563 );
33564 dst
33565}
33566
33567/// Load packed 32-bit integers from memory into dst using writemask k
33568/// (elements are copied from src when the corresponding mask bit is not set).
33569/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33570///
33571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
33572#[inline]
33573#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33575pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
33576 let mut dst: __m128i = src;
33577 asm!(
33578 vpl!("vmovdqa32 {dst}{{{k}}}"),
33579 p = in(reg) mem_addr,
33580 k = in(kreg) k,
33581 dst = inout(xmm_reg) dst,
33582 options(pure, readonly, nostack)
33583 );
33584 dst
33585}
33586
33587/// Load packed 32-bit integers from memory into dst using zeromask k
33588/// (elements are zeroed out when the corresponding mask bit is not set).
33589/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33590///
33591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
33592#[inline]
33593#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33595pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
33596 let mut dst: __m128i;
33597 asm!(
33598 vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
33599 p = in(reg) mem_addr,
33600 k = in(kreg) k,
33601 dst = out(xmm_reg) dst,
33602 options(pure, readonly, nostack)
33603 );
33604 dst
33605}
33606
33607/// Load packed 64-bit integers from memory into dst using writemask k
33608/// (elements are copied from src when the corresponding mask bit is not set).
33609/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33610///
33611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
33612#[inline]
33613#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33615pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
33616 let mut dst: __m128i = src;
33617 asm!(
33618 vpl!("vmovdqa64 {dst}{{{k}}}"),
33619 p = in(reg) mem_addr,
33620 k = in(kreg) k,
33621 dst = inout(xmm_reg) dst,
33622 options(pure, readonly, nostack)
33623 );
33624 dst
33625}
33626
33627/// Load packed 64-bit integers from memory into dst using zeromask k
33628/// (elements are zeroed out when the corresponding mask bit is not set).
33629/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33630///
33631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
33632#[inline]
33633#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33635pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
33636 let mut dst: __m128i;
33637 asm!(
33638 vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
33639 p = in(reg) mem_addr,
33640 k = in(kreg) k,
33641 dst = out(xmm_reg) dst,
33642 options(pure, readonly, nostack)
33643 );
33644 dst
33645}
33646
33647/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33648/// (elements are copied from src when the corresponding mask bit is not set).
33649/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33650///
33651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
33652#[inline]
33653#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33655pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33656 let mut dst: __m128 = src;
33657 asm!(
33658 vpl!("vmovaps {dst}{{{k}}}"),
33659 p = in(reg) mem_addr,
33660 k = in(kreg) k,
33661 dst = inout(xmm_reg) dst,
33662 options(pure, readonly, nostack)
33663 );
33664 dst
33665}
33666
33667/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33668/// (elements are zeroed out when the corresponding mask bit is not set).
33669/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33670///
33671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
33672#[inline]
33673#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33675pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
33676 let mut dst: __m128;
33677 asm!(
33678 vpl!("vmovaps {dst}{{{k}}} {{z}}"),
33679 p = in(reg) mem_addr,
33680 k = in(kreg) k,
33681 dst = out(xmm_reg) dst,
33682 options(pure, readonly, nostack)
33683 );
33684 dst
33685}
33686
33687/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33688/// (elements are copied from src when the corresponding mask bit is not set).
33689/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33690///
33691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
33692#[inline]
33693#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33695pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33696 let mut dst: __m128d = src;
33697 asm!(
33698 vpl!("vmovapd {dst}{{{k}}}"),
33699 p = in(reg) mem_addr,
33700 k = in(kreg) k,
33701 dst = inout(xmm_reg) dst,
33702 options(pure, readonly, nostack)
33703 );
33704 dst
33705}
33706
33707/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33708/// (elements are zeroed out when the corresponding mask bit is not set).
33709/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33710///
33711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
33712#[inline]
33713#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33715pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33716 let mut dst: __m128d;
33717 asm!(
33718 vpl!("vmovapd {dst}{{{k}}} {{z}}"),
33719 p = in(reg) mem_addr,
33720 k = in(kreg) k,
33721 dst = out(xmm_reg) dst,
33722 options(pure, readonly, nostack)
33723 );
33724 dst
33725}
33726
33727/// Store packed 32-bit integers from a into memory using writemask k.
33728/// mem_addr does not need to be aligned on any particular boundary.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
33735 asm!(
33736 vps!("vmovdqu32", "{{{mask}}}, {a}"),
33737 p = in(reg) mem_addr,
33738 mask = in(kreg) mask,
33739 a = in(zmm_reg) a,
33740 options(nostack)
33741 );
33742}
33743
33744/// Store packed 64-bit integers from a into memory using writemask k.
33745/// mem_addr does not need to be aligned on any particular boundary.
33746///
33747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
33748#[inline]
33749#[target_feature(enable = "avx512f")]
33750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33751pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
33752 asm!(
33753 vps!("vmovdqu64", "{{{mask}}}, {a}"),
33754 p = in(reg) mem_addr,
33755 mask = in(kreg) mask,
33756 a = in(zmm_reg) a,
33757 options(nostack)
33758 );
33759}
33760
33761/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33762/// mem_addr does not need to be aligned on any particular boundary.
33763///
33764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
33765#[inline]
33766#[target_feature(enable = "avx512f")]
33767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33768pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
33769 asm!(
33770 vps!("vmovups", "{{{mask}}}, {a}"),
33771 p = in(reg) mem_addr,
33772 mask = in(kreg) mask,
33773 a = in(zmm_reg) a,
33774 options(nostack)
33775 );
33776}
33777
33778/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33779/// mem_addr does not need to be aligned on any particular boundary.
33780///
33781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
33782#[inline]
33783#[target_feature(enable = "avx512f")]
33784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33785pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
33786 asm!(
33787 vps!("vmovupd", "{{{mask}}}, {a}"),
33788 p = in(reg) mem_addr,
33789 mask = in(kreg) mask,
33790 a = in(zmm_reg) a,
33791 options(nostack)
33792 );
33793}
33794
33795/// Store packed 32-bit integers from a into memory using writemask k.
33796/// mem_addr does not need to be aligned on any particular boundary.
33797///
33798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
33799#[inline]
33800#[target_feature(enable = "avx512f,avx512vl,avx")]
33801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33802pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
33803 asm!(
33804 vps!("vmovdqu32", "{{{mask}}}, {a}"),
33805 p = in(reg) mem_addr,
33806 mask = in(kreg) mask,
33807 a = in(ymm_reg) a,
33808 options(nostack)
33809 );
33810}
33811
33812/// Store packed 64-bit integers from a into memory using writemask k.
33813/// mem_addr does not need to be aligned on any particular boundary.
33814///
33815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
33816#[inline]
33817#[target_feature(enable = "avx512f,avx512vl,avx")]
33818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33819pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
33820 asm!(
33821 vps!("vmovdqu64", "{{{mask}}}, {a}"),
33822 p = in(reg) mem_addr,
33823 mask = in(kreg) mask,
33824 a = in(ymm_reg) a,
33825 options(nostack)
33826 );
33827}
33828
33829/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33830/// mem_addr does not need to be aligned on any particular boundary.
33831///
33832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
33833#[inline]
33834#[target_feature(enable = "avx512f,avx512vl,avx")]
33835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33836pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
33837 asm!(
33838 vps!("vmovups", "{{{mask}}}, {a}"),
33839 p = in(reg) mem_addr,
33840 mask = in(kreg) mask,
33841 a = in(ymm_reg) a,
33842 options(nostack)
33843 );
33844}
33845
33846/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33847/// mem_addr does not need to be aligned on any particular boundary.
33848///
33849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
33850#[inline]
33851#[target_feature(enable = "avx512f,avx512vl,avx")]
33852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33853pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
33854 asm!(
33855 vps!("vmovupd", "{{{mask}}}, {a}"),
33856 p = in(reg) mem_addr,
33857 mask = in(kreg) mask,
33858 a = in(ymm_reg) a,
33859 options(nostack)
33860 );
33861}
33862
33863/// Store packed 32-bit integers from a into memory using writemask k.
33864/// mem_addr does not need to be aligned on any particular boundary.
33865///
33866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
33867#[inline]
33868#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33870pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
33871 asm!(
33872 vps!("vmovdqu32", "{{{mask}}}, {a}"),
33873 p = in(reg) mem_addr,
33874 mask = in(kreg) mask,
33875 a = in(xmm_reg) a,
33876 options(nostack)
33877 );
33878}
33879
33880/// Store packed 64-bit integers from a into memory using writemask k.
33881/// mem_addr does not need to be aligned on any particular boundary.
33882///
33883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
33884#[inline]
33885#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33887pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
33888 asm!(
33889 vps!("vmovdqu64", "{{{mask}}}, {a}"),
33890 p = in(reg) mem_addr,
33891 mask = in(kreg) mask,
33892 a = in(xmm_reg) a,
33893 options(nostack)
33894 );
33895}
33896
33897/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33898/// mem_addr does not need to be aligned on any particular boundary.
33899///
33900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
33901#[inline]
33902#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33904pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
33905 asm!(
33906 vps!("vmovups", "{{{mask}}}, {a}"),
33907 p = in(reg) mem_addr,
33908 mask = in(kreg) mask,
33909 a = in(xmm_reg) a,
33910 options(nostack)
33911 );
33912}
33913
33914/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33915/// mem_addr does not need to be aligned on any particular boundary.
33916///
33917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
33918#[inline]
33919#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
33920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33921pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
33922 asm!(
33923 vps!("vmovupd", "{{{mask}}}, {a}"),
33924 p = in(reg) mem_addr,
33925 mask = in(kreg) mask,
33926 a = in(xmm_reg) a,
33927 options(nostack)
33928 );
33929}
33930
33931/// Store packed 32-bit integers from a into memory using writemask k.
33932/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33933///
33934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
33935#[inline]
33936#[target_feature(enable = "avx512f")]
33937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33938pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
33939 asm!(
33940 vps!("vmovdqa32", "{{{mask}}}, {a}"),
33941 p = in(reg) mem_addr,
33942 mask = in(kreg) mask,
33943 a = in(zmm_reg) a,
33944 options(nostack)
33945 );
33946}
33947
33948/// Store packed 64-bit integers from a into memory using writemask k.
33949/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33950///
33951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
33952#[inline]
33953#[target_feature(enable = "avx512f")]
33954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33955pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
33956 asm!(
33957 vps!("vmovdqa64", "{{{mask}}}, {a}"),
33958 p = in(reg) mem_addr,
33959 mask = in(kreg) mask,
33960 a = in(zmm_reg) a,
33961 options(nostack)
33962 );
33963}
33964
33965/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33966/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33967///
33968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
33969#[inline]
33970#[target_feature(enable = "avx512f")]
33971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33972pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
33973 asm!(
33974 vps!("vmovaps", "{{{mask}}}, {a}"),
33975 p = in(reg) mem_addr,
33976 mask = in(kreg) mask,
33977 a = in(zmm_reg) a,
33978 options(nostack)
33979 );
33980}
33981
33982/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33983/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33984///
33985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
33986#[inline]
33987#[target_feature(enable = "avx512f")]
33988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33989pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
33990 asm!(
33991 vps!("vmovapd", "{{{mask}}}, {a}"),
33992 p = in(reg) mem_addr,
33993 mask = in(kreg) mask,
33994 a = in(zmm_reg) a,
33995 options(nostack)
33996 );
33997}
33998
33999/// Store packed 32-bit integers from a into memory using writemask k.
34000/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34001///
34002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
34003#[inline]
34004#[target_feature(enable = "avx512f,avx512vl,avx")]
34005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34006pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
34007 asm!(
34008 vps!("vmovdqa32", "{{{mask}}}, {a}"),
34009 p = in(reg) mem_addr,
34010 mask = in(kreg) mask,
34011 a = in(ymm_reg) a,
34012 options(nostack)
34013 );
34014}
34015
34016/// Store packed 64-bit integers from a into memory using writemask k.
34017/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34018///
34019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
34020#[inline]
34021#[target_feature(enable = "avx512f,avx512vl,avx")]
34022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34023pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
34024 asm!(
34025 vps!("vmovdqa64", "{{{mask}}}, {a}"),
34026 p = in(reg) mem_addr,
34027 mask = in(kreg) mask,
34028 a = in(ymm_reg) a,
34029 options(nostack)
34030 );
34031}
34032
34033/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
34034/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34035///
34036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
34037#[inline]
34038#[target_feature(enable = "avx512f,avx512vl,avx")]
34039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34040pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
34041 asm!(
34042 vps!("vmovaps", "{{{mask}}}, {a}"),
34043 p = in(reg) mem_addr,
34044 mask = in(kreg) mask,
34045 a = in(ymm_reg) a,
34046 options(nostack)
34047 );
34048}
34049
34050/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
34051/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34052///
34053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
34054#[inline]
34055#[target_feature(enable = "avx512f,avx512vl,avx")]
34056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34057pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
34058 asm!(
34059 vps!("vmovapd", "{{{mask}}}, {a}"),
34060 p = in(reg) mem_addr,
34061 mask = in(kreg) mask,
34062 a = in(ymm_reg) a,
34063 options(nostack)
34064 );
34065}
34066
34067/// Store packed 32-bit integers from a into memory using writemask k.
34068/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34069///
34070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
34071#[inline]
34072#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34074pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
34075 asm!(
34076 vps!("vmovdqa32", "{{{mask}}}, {a}"),
34077 p = in(reg) mem_addr,
34078 mask = in(kreg) mask,
34079 a = in(xmm_reg) a,
34080 options(nostack)
34081 );
34082}
34083
34084/// Store packed 64-bit integers from a into memory using writemask k.
34085/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34086///
34087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
34088#[inline]
34089#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34091pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
34092 asm!(
34093 vps!("vmovdqa64", "{{{mask}}}, {a}"),
34094 p = in(reg) mem_addr,
34095 mask = in(kreg) mask,
34096 a = in(xmm_reg) a,
34097 options(nostack)
34098 );
34099}
34100
34101/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
34102/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34103///
34104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
34105#[inline]
34106#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34108pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
34109 asm!(
34110 vps!("vmovaps", "{{{mask}}}, {a}"),
34111 p = in(reg) mem_addr,
34112 mask = in(kreg) mask,
34113 a = in(xmm_reg) a,
34114 options(nostack)
34115 );
34116}
34117
34118/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
34119/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34120///
34121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
34122#[inline]
34123#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34125pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
34126 asm!(
34127 vps!("vmovapd", "{{{mask}}}, {a}"),
34128 p = in(reg) mem_addr,
34129 mask = in(kreg) mask,
34130 a = in(xmm_reg) a,
34131 options(nostack)
34132 );
34133}
34134
34135/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34136///
34137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
34138#[inline]
34139#[target_feature(enable = "avx512f")]
34140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34141pub unsafe fn _mm512_mask_expandloadu_epi32(
34142 src: __m512i,
34143 k: __mmask16,
34144 mem_addr: *const i32,
34145) -> __m512i {
34146 let mut dst: __m512i = src;
34147 asm!(
34148 vpl!("vpexpandd {dst}{{{k}}}"),
34149 p = in(reg) mem_addr,
34150 k = in(kreg) k,
34151 dst = inout(zmm_reg) dst,
34152 options(pure, readonly, nostack)
34153 );
34154 dst
34155}
34156
34157/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34158///
34159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
34160#[inline]
34161#[target_feature(enable = "avx512f")]
34162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34163pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34164 let mut dst: __m512i;
34165 asm!(
34166 vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
34167 p = in(reg) mem_addr,
34168 k = in(kreg) k,
34169 dst = out(zmm_reg) dst,
34170 options(pure, readonly, nostack)
34171 );
34172 dst
34173}
34174
34175/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34176///
34177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
34178#[inline]
34179#[target_feature(enable = "avx512f,avx512vl,avx")]
34180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34181pub unsafe fn _mm256_mask_expandloadu_epi32(
34182 src: __m256i,
34183 k: __mmask8,
34184 mem_addr: *const i32,
34185) -> __m256i {
34186 let mut dst: __m256i = src;
34187 asm!(
34188 vpl!("vpexpandd {dst}{{{k}}}"),
34189 p = in(reg) mem_addr,
34190 k = in(kreg) k,
34191 dst = inout(ymm_reg) dst,
34192 options(pure, readonly, nostack)
34193 );
34194 dst
34195}
34196
34197/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34198///
34199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
34200#[inline]
34201#[target_feature(enable = "avx512f,avx512vl,avx")]
34202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34203pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34204 let mut dst: __m256i;
34205 asm!(
34206 vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
34207 p = in(reg) mem_addr,
34208 k = in(kreg) k,
34209 dst = out(ymm_reg) dst,
34210 options(pure, readonly, nostack)
34211 );
34212 dst
34213}
34214
34215/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34216///
34217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
34218#[inline]
34219#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34221pub unsafe fn _mm_mask_expandloadu_epi32(
34222 src: __m128i,
34223 k: __mmask8,
34224 mem_addr: *const i32,
34225) -> __m128i {
34226 let mut dst: __m128i = src;
34227 asm!(
34228 vpl!("vpexpandd {dst}{{{k}}}"),
34229 p = in(reg) mem_addr,
34230 k = in(kreg) k,
34231 dst = inout(xmm_reg) dst,
34232 options(pure, readonly, nostack)
34233 );
34234 dst
34235}
34236
34237/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34238///
34239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
34240#[inline]
34241#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34243pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34244 let mut dst: __m128i;
34245 asm!(
34246 vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
34247 p = in(reg) mem_addr,
34248 k = in(kreg) k,
34249 dst = out(xmm_reg) dst,
34250 options(pure, readonly, nostack)
34251 );
34252 dst
34253}
34254
34255/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34256///
34257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
34258#[inline]
34259#[target_feature(enable = "avx512f")]
34260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34261pub unsafe fn _mm512_mask_expandloadu_epi64(
34262 src: __m512i,
34263 k: __mmask8,
34264 mem_addr: *const i64,
34265) -> __m512i {
34266 let mut dst: __m512i = src;
34267 asm!(
34268 vpl!("vpexpandq {dst}{{{k}}}"),
34269 p = in(reg) mem_addr,
34270 k = in(kreg) k,
34271 dst = inout(zmm_reg) dst,
34272 options(pure, readonly, nostack)
34273 );
34274 dst
34275}
34276
34277/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34278///
34279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
34280#[inline]
34281#[target_feature(enable = "avx512f")]
34282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34283pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34284 let mut dst: __m512i;
34285 asm!(
34286 vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
34287 p = in(reg) mem_addr,
34288 k = in(kreg) k,
34289 dst = out(zmm_reg) dst,
34290 options(pure, readonly, nostack)
34291 );
34292 dst
34293}
34294
34295/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
34298#[inline]
34299#[target_feature(enable = "avx512f,avx512vl,avx")]
34300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34301pub unsafe fn _mm256_mask_expandloadu_epi64(
34302 src: __m256i,
34303 k: __mmask8,
34304 mem_addr: *const i64,
34305) -> __m256i {
34306 let mut dst: __m256i = src;
34307 asm!(
34308 vpl!("vpexpandq {dst}{{{k}}}"),
34309 p = in(reg) mem_addr,
34310 k = in(kreg) k,
34311 dst = inout(ymm_reg) dst,
34312 options(pure, readonly, nostack)
34313 );
34314 dst
34315}
34316
34317/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34318///
34319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
34320#[inline]
34321#[target_feature(enable = "avx512f,avx512vl,avx")]
34322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34323pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34324 let mut dst: __m256i;
34325 asm!(
34326 vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
34327 p = in(reg) mem_addr,
34328 k = in(kreg) k,
34329 dst = out(ymm_reg) dst,
34330 options(pure, readonly, nostack)
34331 );
34332 dst
34333}
34334
34335/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34336///
34337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
34338#[inline]
34339#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34341pub unsafe fn _mm_mask_expandloadu_epi64(
34342 src: __m128i,
34343 k: __mmask8,
34344 mem_addr: *const i64,
34345) -> __m128i {
34346 let mut dst: __m128i = src;
34347 asm!(
34348 vpl!("vpexpandq {dst}{{{k}}}"),
34349 p = in(reg) mem_addr,
34350 k = in(kreg) k,
34351 dst = inout(xmm_reg) dst,
34352 options(pure, readonly, nostack)
34353 );
34354 dst
34355}
34356
34357/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34358///
34359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
34360#[inline]
34361#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34363pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34364 let mut dst: __m128i;
34365 asm!(
34366 vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
34367 p = in(reg) mem_addr,
34368 k = in(kreg) k,
34369 dst = out(xmm_reg) dst,
34370 options(pure, readonly, nostack)
34371 );
34372 dst
34373}
34374
34375/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34376///
34377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
34378#[inline]
34379#[target_feature(enable = "avx512f")]
34380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34381pub unsafe fn _mm512_mask_expandloadu_ps(
34382 src: __m512,
34383 k: __mmask16,
34384 mem_addr: *const f32,
34385) -> __m512 {
34386 let mut dst: __m512 = src;
34387 asm!(
34388 vpl!("vexpandps {dst}{{{k}}}"),
34389 p = in(reg) mem_addr,
34390 k = in(kreg) k,
34391 dst = inout(zmm_reg) dst,
34392 options(pure, readonly, nostack)
34393 );
34394 dst
34395}
34396
34397/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34398///
34399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
34400#[inline]
34401#[target_feature(enable = "avx512f")]
34402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34403pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34404 let mut dst: __m512;
34405 asm!(
34406 vpl!("vexpandps {dst}{{{k}}} {{z}}"),
34407 p = in(reg) mem_addr,
34408 k = in(kreg) k,
34409 dst = out(zmm_reg) dst,
34410 options(pure, readonly, nostack)
34411 );
34412 dst
34413}
34414
34415/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34416///
34417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
34418#[inline]
34419#[target_feature(enable = "avx512f,avx512vl,avx")]
34420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34421pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34422 let mut dst: __m256 = src;
34423 asm!(
34424 vpl!("vexpandps {dst}{{{k}}}"),
34425 p = in(reg) mem_addr,
34426 k = in(kreg) k,
34427 dst = inout(ymm_reg) dst,
34428 options(pure, readonly, nostack)
34429 );
34430 dst
34431}
34432
34433/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34434///
34435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
34436#[inline]
34437#[target_feature(enable = "avx512f,avx512vl,avx")]
34438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34439pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34440 let mut dst: __m256;
34441 asm!(
34442 vpl!("vexpandps {dst}{{{k}}} {{z}}"),
34443 p = in(reg) mem_addr,
34444 k = in(kreg) k,
34445 dst = out(ymm_reg) dst,
34446 options(pure, readonly, nostack)
34447 );
34448 dst
34449}
34450
34451/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
34454#[inline]
34455#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34457pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34458 let mut dst: __m128 = src;
34459 asm!(
34460 vpl!("vexpandps {dst}{{{k}}}"),
34461 p = in(reg) mem_addr,
34462 k = in(kreg) k,
34463 dst = inout(xmm_reg) dst,
34464 options(pure, readonly, nostack)
34465 );
34466 dst
34467}
34468
34469/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34470///
34471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
34472#[inline]
34473#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34475pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34476 let mut dst: __m128;
34477 asm!(
34478 vpl!("vexpandps {dst}{{{k}}} {{z}}"),
34479 p = in(reg) mem_addr,
34480 k = in(kreg) k,
34481 dst = out(xmm_reg) dst,
34482 options(pure, readonly, nostack)
34483 );
34484 dst
34485}
34486
34487/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34488///
34489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
34490#[inline]
34491#[target_feature(enable = "avx512f")]
34492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34493pub unsafe fn _mm512_mask_expandloadu_pd(
34494 src: __m512d,
34495 k: __mmask8,
34496 mem_addr: *const f64,
34497) -> __m512d {
34498 let mut dst: __m512d = src;
34499 asm!(
34500 vpl!("vexpandpd {dst}{{{k}}}"),
34501 p = in(reg) mem_addr,
34502 k = in(kreg) k,
34503 dst = inout(zmm_reg) dst,
34504 options(pure, readonly, nostack)
34505 );
34506 dst
34507}
34508
34509/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34510///
34511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
34512#[inline]
34513#[target_feature(enable = "avx512f")]
34514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34515pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34516 let mut dst: __m512d;
34517 asm!(
34518 vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
34519 p = in(reg) mem_addr,
34520 k = in(kreg) k,
34521 dst = out(zmm_reg) dst,
34522 options(pure, readonly, nostack)
34523 );
34524 dst
34525}
34526
34527/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34528///
34529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
34530#[inline]
34531#[target_feature(enable = "avx512f,avx512vl,avx")]
34532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34533pub unsafe fn _mm256_mask_expandloadu_pd(
34534 src: __m256d,
34535 k: __mmask8,
34536 mem_addr: *const f64,
34537) -> __m256d {
34538 let mut dst: __m256d = src;
34539 asm!(
34540 vpl!("vexpandpd {dst}{{{k}}}"),
34541 p = in(reg) mem_addr,
34542 k = in(kreg) k,
34543 dst = inout(ymm_reg) dst,
34544 options(pure, readonly, nostack)
34545 );
34546 dst
34547}
34548
34549/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34550///
34551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
34552#[inline]
34553#[target_feature(enable = "avx512f,avx512vl,avx")]
34554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34555pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34556 let mut dst: __m256d;
34557 asm!(
34558 vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
34559 p = in(reg) mem_addr,
34560 k = in(kreg) k,
34561 dst = out(ymm_reg) dst,
34562 options(pure, readonly, nostack)
34563 );
34564 dst
34565}
34566
34567/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34568///
34569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
34570#[inline]
34571#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34573pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34574 let mut dst: __m128d = src;
34575 asm!(
34576 vpl!("vexpandpd {dst}{{{k}}}"),
34577 p = in(reg) mem_addr,
34578 k = in(kreg) k,
34579 dst = inout(xmm_reg) dst,
34580 options(pure, readonly, nostack)
34581 );
34582 dst
34583}
34584
34585/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34586///
34587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
34588#[inline]
34589#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
34590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34591pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
34592 let mut dst: __m128d;
34593 asm!(
34594 vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
34595 p = in(reg) mem_addr,
34596 k = in(kreg) k,
34597 dst = out(xmm_reg) dst,
34598 options(pure, readonly, nostack)
34599 );
34600 dst
34601}
34602
34603/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
34604///
34605/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
34606#[inline]
34607#[target_feature(enable = "avx512f")]
34608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34609pub unsafe fn _mm512_setr_pd(
34610 e0: f64,
34611 e1: f64,
34612 e2: f64,
34613 e3: f64,
34614 e4: f64,
34615 e5: f64,
34616 e6: f64,
34617 e7: f64,
34618) -> __m512d {
34619 let r: f64x8 = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
34620 transmute(src:r)
34621}
34622
34623/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
34624///
34625/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
34626#[inline]
34627#[target_feature(enable = "avx512f")]
34628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34629pub unsafe fn _mm512_set_pd(
34630 e0: f64,
34631 e1: f64,
34632 e2: f64,
34633 e3: f64,
34634 e4: f64,
34635 e5: f64,
34636 e6: f64,
34637 e7: f64,
34638) -> __m512d {
34639 _mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
34640}
34641
34642/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34643///
34644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
34645#[inline]
34646#[target_feature(enable = "avx512f")]
34647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34648#[cfg_attr(test, assert_instr(vmovss))]
34649pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34650 let extractsrc: f32 = simd_extract!(src, 0);
34651 let mut mov: f32 = extractsrc;
34652 if (k & 0b00000001) != 0 {
34653 mov = simd_extract!(b, 0);
34654 }
34655 simd_insert!(a, 0, mov)
34656}
34657
34658/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34659///
34660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
34661#[inline]
34662#[target_feature(enable = "avx512f")]
34663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34664#[cfg_attr(test, assert_instr(vmovss))]
34665pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34666 let mut mov: f32 = 0.;
34667 if (k & 0b00000001) != 0 {
34668 mov = simd_extract!(b, 0);
34669 }
34670 simd_insert!(a, 0, mov)
34671}
34672
34673/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34674///
34675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
34676#[inline]
34677#[target_feature(enable = "avx512f")]
34678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34679#[cfg_attr(test, assert_instr(vmovsd))]
34680pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34681 let extractsrc: f64 = simd_extract!(src, 0);
34682 let mut mov: f64 = extractsrc;
34683 if (k & 0b00000001) != 0 {
34684 mov = simd_extract!(b, 0);
34685 }
34686 simd_insert!(a, 0, mov)
34687}
34688
34689/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34690///
34691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
34692#[inline]
34693#[target_feature(enable = "avx512f")]
34694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34695#[cfg_attr(test, assert_instr(vmovsd))]
34696pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34697 let mut mov: f64 = 0.;
34698 if (k & 0b00000001) != 0 {
34699 mov = simd_extract!(b, 0);
34700 }
34701 simd_insert!(a, 0, mov)
34702}
34703
34704/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34705///
34706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
34707#[inline]
34708#[target_feature(enable = "avx512f")]
34709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34710#[cfg_attr(test, assert_instr(vaddss))]
34711pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34712 let extractsrc: f32 = simd_extract!(src, 0);
34713 let mut add: f32 = extractsrc;
34714 if (k & 0b00000001) != 0 {
34715 let extracta: f32 = simd_extract!(a, 0);
34716 let extractb: f32 = simd_extract!(b, 0);
34717 add = extracta + extractb;
34718 }
34719 simd_insert!(a, 0, add)
34720}
34721
34722/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34723///
34724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
34725#[inline]
34726#[target_feature(enable = "avx512f")]
34727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34728#[cfg_attr(test, assert_instr(vaddss))]
34729pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34730 let mut add: f32 = 0.;
34731 if (k & 0b00000001) != 0 {
34732 let extracta: f32 = simd_extract!(a, 0);
34733 let extractb: f32 = simd_extract!(b, 0);
34734 add = extracta + extractb;
34735 }
34736 simd_insert!(a, 0, add)
34737}
34738
34739/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34740///
34741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
34742#[inline]
34743#[target_feature(enable = "avx512f")]
34744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34745#[cfg_attr(test, assert_instr(vaddsd))]
34746pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34747 let extractsrc: f64 = simd_extract!(src, 0);
34748 let mut add: f64 = extractsrc;
34749 if (k & 0b00000001) != 0 {
34750 let extracta: f64 = simd_extract!(a, 0);
34751 let extractb: f64 = simd_extract!(b, 0);
34752 add = extracta + extractb;
34753 }
34754 simd_insert!(a, 0, add)
34755}
34756
34757/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34758///
34759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
34760#[inline]
34761#[target_feature(enable = "avx512f")]
34762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34763#[cfg_attr(test, assert_instr(vaddsd))]
34764pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34765 let mut add: f64 = 0.;
34766 if (k & 0b00000001) != 0 {
34767 let extracta: f64 = simd_extract!(a, 0);
34768 let extractb: f64 = simd_extract!(b, 0);
34769 add = extracta + extractb;
34770 }
34771 simd_insert!(a, 0, add)
34772}
34773
34774/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34775///
34776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
34777#[inline]
34778#[target_feature(enable = "avx512f")]
34779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34780#[cfg_attr(test, assert_instr(vsubss))]
34781pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34782 let extractsrc: f32 = simd_extract!(src, 0);
34783 let mut add: f32 = extractsrc;
34784 if (k & 0b00000001) != 0 {
34785 let extracta: f32 = simd_extract!(a, 0);
34786 let extractb: f32 = simd_extract!(b, 0);
34787 add = extracta - extractb;
34788 }
34789 simd_insert!(a, 0, add)
34790}
34791
34792/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34798#[cfg_attr(test, assert_instr(vsubss))]
34799pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34800 let mut add: f32 = 0.;
34801 if (k & 0b00000001) != 0 {
34802 let extracta: f32 = simd_extract!(a, 0);
34803 let extractb: f32 = simd_extract!(b, 0);
34804 add = extracta - extractb;
34805 }
34806 simd_insert!(a, 0, add)
34807}
34808
34809/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34810///
34811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
34812#[inline]
34813#[target_feature(enable = "avx512f")]
34814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34815#[cfg_attr(test, assert_instr(vsubsd))]
34816pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34817 let extractsrc: f64 = simd_extract!(src, 0);
34818 let mut add: f64 = extractsrc;
34819 if (k & 0b00000001) != 0 {
34820 let extracta: f64 = simd_extract!(a, 0);
34821 let extractb: f64 = simd_extract!(b, 0);
34822 add = extracta - extractb;
34823 }
34824 simd_insert!(a, 0, add)
34825}
34826
34827/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34828///
34829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
34830#[inline]
34831#[target_feature(enable = "avx512f")]
34832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34833#[cfg_attr(test, assert_instr(vsubsd))]
34834pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34835 let mut add: f64 = 0.;
34836 if (k & 0b00000001) != 0 {
34837 let extracta: f64 = simd_extract!(a, 0);
34838 let extractb: f64 = simd_extract!(b, 0);
34839 add = extracta - extractb;
34840 }
34841 simd_insert!(a, 0, add)
34842}
34843
34844/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34845///
34846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
34847#[inline]
34848#[target_feature(enable = "avx512f")]
34849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34850#[cfg_attr(test, assert_instr(vmulss))]
34851pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34852 let extractsrc: f32 = simd_extract!(src, 0);
34853 let mut add: f32 = extractsrc;
34854 if (k & 0b00000001) != 0 {
34855 let extracta: f32 = simd_extract!(a, 0);
34856 let extractb: f32 = simd_extract!(b, 0);
34857 add = extracta * extractb;
34858 }
34859 simd_insert!(a, 0, add)
34860}
34861
34862/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34863///
34864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
34865#[inline]
34866#[target_feature(enable = "avx512f")]
34867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34868#[cfg_attr(test, assert_instr(vmulss))]
34869pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34870 let mut add: f32 = 0.;
34871 if (k & 0b00000001) != 0 {
34872 let extracta: f32 = simd_extract!(a, 0);
34873 let extractb: f32 = simd_extract!(b, 0);
34874 add = extracta * extractb;
34875 }
34876 simd_insert!(a, 0, add)
34877}
34878
34879/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34880///
34881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
34882#[inline]
34883#[target_feature(enable = "avx512f")]
34884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34885#[cfg_attr(test, assert_instr(vmulsd))]
34886pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34887 let extractsrc: f64 = simd_extract!(src, 0);
34888 let mut add: f64 = extractsrc;
34889 if (k & 0b00000001) != 0 {
34890 let extracta: f64 = simd_extract!(a, 0);
34891 let extractb: f64 = simd_extract!(b, 0);
34892 add = extracta * extractb;
34893 }
34894 simd_insert!(a, 0, add)
34895}
34896
34897/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34898///
34899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
34900#[inline]
34901#[target_feature(enable = "avx512f")]
34902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34903#[cfg_attr(test, assert_instr(vmulsd))]
34904pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34905 let mut add: f64 = 0.;
34906 if (k & 0b00000001) != 0 {
34907 let extracta: f64 = simd_extract!(a, 0);
34908 let extractb: f64 = simd_extract!(b, 0);
34909 add = extracta * extractb;
34910 }
34911 simd_insert!(a, 0, add)
34912}
34913
34914/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34915///
34916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
34917#[inline]
34918#[target_feature(enable = "avx512f")]
34919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34920#[cfg_attr(test, assert_instr(vdivss))]
34921pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34922 let extractsrc: f32 = simd_extract!(src, 0);
34923 let mut add: f32 = extractsrc;
34924 if (k & 0b00000001) != 0 {
34925 let extracta: f32 = simd_extract!(a, 0);
34926 let extractb: f32 = simd_extract!(b, 0);
34927 add = extracta / extractb;
34928 }
34929 simd_insert!(a, 0, add)
34930}
34931
34932/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34933///
34934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
34935#[inline]
34936#[target_feature(enable = "avx512f")]
34937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34938#[cfg_attr(test, assert_instr(vdivss))]
34939pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34940 let mut add: f32 = 0.;
34941 if (k & 0b00000001) != 0 {
34942 let extracta: f32 = simd_extract!(a, 0);
34943 let extractb: f32 = simd_extract!(b, 0);
34944 add = extracta / extractb;
34945 }
34946 simd_insert!(a, 0, add)
34947}
34948
34949/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34950///
34951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
34952#[inline]
34953#[target_feature(enable = "avx512f")]
34954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955#[cfg_attr(test, assert_instr(vdivsd))]
34956pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34957 let extractsrc: f64 = simd_extract!(src, 0);
34958 let mut add: f64 = extractsrc;
34959 if (k & 0b00000001) != 0 {
34960 let extracta: f64 = simd_extract!(a, 0);
34961 let extractb: f64 = simd_extract!(b, 0);
34962 add = extracta / extractb;
34963 }
34964 simd_insert!(a, 0, add)
34965}
34966
34967/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34968///
34969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
34970#[inline]
34971#[target_feature(enable = "avx512f")]
34972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34973#[cfg_attr(test, assert_instr(vdivsd))]
34974pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34975 let mut add: f64 = 0.;
34976 if (k & 0b00000001) != 0 {
34977 let extracta: f64 = simd_extract!(a, 0);
34978 let extractb: f64 = simd_extract!(b, 0);
34979 add = extracta / extractb;
34980 }
34981 simd_insert!(a, 0, add)
34982}
34983
34984/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34985///
34986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
34987#[inline]
34988#[target_feature(enable = "avx512f")]
34989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34990#[cfg_attr(test, assert_instr(vmaxss))]
34991pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34992 transmute(src:vmaxss(
34993 a:a.as_f32x4(),
34994 b:b.as_f32x4(),
34995 src:src.as_f32x4(),
34996 mask:k,
34997 _MM_FROUND_CUR_DIRECTION,
34998 ))
34999}
35000
35001/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35002///
35003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
35004#[inline]
35005#[target_feature(enable = "avx512f")]
35006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35007#[cfg_attr(test, assert_instr(vmaxss))]
35008pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35009 transmute(src:vmaxss(
35010 a:a.as_f32x4(),
35011 b:b.as_f32x4(),
35012 src:_mm_setzero_ps().as_f32x4(),
35013 mask:k,
35014 _MM_FROUND_CUR_DIRECTION,
35015 ))
35016}
35017
35018/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35019///
35020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
35021#[inline]
35022#[target_feature(enable = "avx512f")]
35023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35024#[cfg_attr(test, assert_instr(vmaxsd))]
35025pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35026 transmute(src:vmaxsd(
35027 a:a.as_f64x2(),
35028 b:b.as_f64x2(),
35029 src:src.as_f64x2(),
35030 mask:k,
35031 _MM_FROUND_CUR_DIRECTION,
35032 ))
35033}
35034
35035/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35036///
35037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
35038#[inline]
35039#[target_feature(enable = "avx512f")]
35040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35041#[cfg_attr(test, assert_instr(vmaxsd))]
35042pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35043 transmute(src:vmaxsd(
35044 a:a.as_f64x2(),
35045 b:b.as_f64x2(),
35046 src:_mm_setzero_pd().as_f64x2(),
35047 mask:k,
35048 _MM_FROUND_CUR_DIRECTION,
35049 ))
35050}
35051
35052/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35053///
35054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
35055#[inline]
35056#[target_feature(enable = "avx512f")]
35057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35058#[cfg_attr(test, assert_instr(vminss))]
35059pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35060 transmute(src:vminss(
35061 a:a.as_f32x4(),
35062 b:b.as_f32x4(),
35063 src:src.as_f32x4(),
35064 mask:k,
35065 _MM_FROUND_CUR_DIRECTION,
35066 ))
35067}
35068
35069/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35070///
35071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
35072#[inline]
35073#[target_feature(enable = "avx512f")]
35074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35075#[cfg_attr(test, assert_instr(vminss))]
35076pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35077 transmute(src:vminss(
35078 a:a.as_f32x4(),
35079 b:b.as_f32x4(),
35080 src:_mm_setzero_ps().as_f32x4(),
35081 mask:k,
35082 _MM_FROUND_CUR_DIRECTION,
35083 ))
35084}
35085
35086/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35087///
35088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
35089#[inline]
35090#[target_feature(enable = "avx512f")]
35091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35092#[cfg_attr(test, assert_instr(vminsd))]
35093pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35094 transmute(src:vminsd(
35095 a:a.as_f64x2(),
35096 b:b.as_f64x2(),
35097 src:src.as_f64x2(),
35098 mask:k,
35099 _MM_FROUND_CUR_DIRECTION,
35100 ))
35101}
35102
35103/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35104///
35105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
35106#[inline]
35107#[target_feature(enable = "avx512f")]
35108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35109#[cfg_attr(test, assert_instr(vminsd))]
35110pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35111 transmute(src:vminsd(
35112 a:a.as_f64x2(),
35113 b:b.as_f64x2(),
35114 src:_mm_setzero_pd().as_f64x2(),
35115 mask:k,
35116 _MM_FROUND_CUR_DIRECTION,
35117 ))
35118}
35119
35120/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35121///
35122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
35123#[inline]
35124#[target_feature(enable = "avx512f")]
35125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35126#[cfg_attr(test, assert_instr(vsqrtss))]
35127pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35128 transmute(src:vsqrtss(
35129 a:a.as_f32x4(),
35130 b:b.as_f32x4(),
35131 src:src.as_f32x4(),
35132 mask:k,
35133 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35134 ))
35135}
35136
35137/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35138///
35139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
35140#[inline]
35141#[target_feature(enable = "avx512f")]
35142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35143#[cfg_attr(test, assert_instr(vsqrtss))]
35144pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35145 transmute(src:vsqrtss(
35146 a:a.as_f32x4(),
35147 b:b.as_f32x4(),
35148 src:_mm_setzero_ps().as_f32x4(),
35149 mask:k,
35150 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35151 ))
35152}
35153
35154/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35155///
35156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
35157#[inline]
35158#[target_feature(enable = "avx512f")]
35159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35160#[cfg_attr(test, assert_instr(vsqrtsd))]
35161pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35162 transmute(src:vsqrtsd(
35163 a:a.as_f64x2(),
35164 b:b.as_f64x2(),
35165 src:src.as_f64x2(),
35166 mask:k,
35167 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35168 ))
35169}
35170
35171/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35172///
35173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
35174#[inline]
35175#[target_feature(enable = "avx512f")]
35176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35177#[cfg_attr(test, assert_instr(vsqrtsd))]
35178pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35179 transmute(src:vsqrtsd(
35180 a:a.as_f64x2(),
35181 b:b.as_f64x2(),
35182 src:_mm_setzero_pd().as_f64x2(),
35183 mask:k,
35184 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35185 ))
35186}
35187
35188/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35189///
35190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
35191#[inline]
35192#[target_feature(enable = "avx512f")]
35193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35194#[cfg_attr(test, assert_instr(vrsqrt14ss))]
35195pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
35196 transmute(src:vrsqrt14ss(
35197 a:a.as_f32x4(),
35198 b:b.as_f32x4(),
35199 src:_mm_setzero_ps().as_f32x4(),
35200 mask:0b1,
35201 ))
35202}
35203
35204/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35205///
35206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
35207#[inline]
35208#[target_feature(enable = "avx512f")]
35209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35210#[cfg_attr(test, assert_instr(vrsqrt14ss))]
35211pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35212 transmute(src:vrsqrt14ss(a:a.as_f32x4(), b:b.as_f32x4(), src:src.as_f32x4(), mask:k))
35213}
35214
35215/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35216///
35217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
35218#[inline]
35219#[target_feature(enable = "avx512f")]
35220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35221#[cfg_attr(test, assert_instr(vrsqrt14ss))]
35222pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35223 transmute(src:vrsqrt14ss(
35224 a:a.as_f32x4(),
35225 b:b.as_f32x4(),
35226 src:_mm_setzero_ps().as_f32x4(),
35227 mask:k,
35228 ))
35229}
35230
35231/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35232///
35233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
35234#[inline]
35235#[target_feature(enable = "avx512f")]
35236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35237#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35238pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
35239 transmute(src:vrsqrt14sd(
35240 a:a.as_f64x2(),
35241 b:b.as_f64x2(),
35242 src:_mm_setzero_pd().as_f64x2(),
35243 mask:0b1,
35244 ))
35245}
35246
35247/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35248///
35249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
35250#[inline]
35251#[target_feature(enable = "avx512f")]
35252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35253#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35254pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35255 transmute(src:vrsqrt14sd(a:a.as_f64x2(), b:b.as_f64x2(), src:src.as_f64x2(), mask:k))
35256}
35257
35258/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35259///
35260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
35261#[inline]
35262#[target_feature(enable = "avx512f")]
35263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35264#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35265pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35266 transmute(src:vrsqrt14sd(
35267 a:a.as_f64x2(),
35268 b:b.as_f64x2(),
35269 src:_mm_setzero_pd().as_f64x2(),
35270 mask:k,
35271 ))
35272}
35273
35274/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35275///
35276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
35277#[inline]
35278#[target_feature(enable = "avx512f")]
35279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35280#[cfg_attr(test, assert_instr(vrcp14ss))]
35281pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
35282 transmute(src:vrcp14ss(
35283 a:a.as_f32x4(),
35284 b:b.as_f32x4(),
35285 src:_mm_setzero_ps().as_f32x4(),
35286 mask:0b1,
35287 ))
35288}
35289
35290/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35291///
35292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
35293#[inline]
35294#[target_feature(enable = "avx512f")]
35295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35296#[cfg_attr(test, assert_instr(vrcp14ss))]
35297pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35298 transmute(src:vrcp14ss(a:a.as_f32x4(), b:b.as_f32x4(), src:src.as_f32x4(), mask:k))
35299}
35300
35301/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35302///
35303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
35304#[inline]
35305#[target_feature(enable = "avx512f")]
35306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35307#[cfg_attr(test, assert_instr(vrcp14ss))]
35308pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35309 transmute(src:vrcp14ss(
35310 a:a.as_f32x4(),
35311 b:b.as_f32x4(),
35312 src:_mm_setzero_ps().as_f32x4(),
35313 mask:k,
35314 ))
35315}
35316
35317/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35318///
35319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
35320#[inline]
35321#[target_feature(enable = "avx512f")]
35322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35323#[cfg_attr(test, assert_instr(vrcp14sd))]
35324pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
35325 transmute(src:vrcp14sd(
35326 a:a.as_f64x2(),
35327 b:b.as_f64x2(),
35328 src:_mm_setzero_pd().as_f64x2(),
35329 mask:0b1,
35330 ))
35331}
35332
35333/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35334///
35335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
35336#[inline]
35337#[target_feature(enable = "avx512f")]
35338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35339#[cfg_attr(test, assert_instr(vrcp14sd))]
35340pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35341 transmute(src:vrcp14sd(a:a.as_f64x2(), b:b.as_f64x2(), src:src.as_f64x2(), mask:k))
35342}
35343
35344/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35345///
35346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
35347#[inline]
35348#[target_feature(enable = "avx512f")]
35349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35350#[cfg_attr(test, assert_instr(vrcp14sd))]
35351pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35352 transmute(src:vrcp14sd(
35353 a:a.as_f64x2(),
35354 b:b.as_f64x2(),
35355 src:_mm_setzero_pd().as_f64x2(),
35356 mask:k,
35357 ))
35358}
35359
35360/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35361///
35362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
35363#[inline]
35364#[target_feature(enable = "avx512f")]
35365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35366#[cfg_attr(test, assert_instr(vgetexpss))]
35367pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
35368 transmute(src:vgetexpss(
35369 a:a.as_f32x4(),
35370 b:b.as_f32x4(),
35371 src:_mm_setzero_ps().as_f32x4(),
35372 mask:0b1,
35373 _MM_FROUND_NO_EXC,
35374 ))
35375}
35376
35377/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35378///
35379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
35380#[inline]
35381#[target_feature(enable = "avx512f")]
35382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35383#[cfg_attr(test, assert_instr(vgetexpss))]
35384pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35385 transmute(src:vgetexpss(
35386 a:a.as_f32x4(),
35387 b:b.as_f32x4(),
35388 src:src.as_f32x4(),
35389 mask:k,
35390 _MM_FROUND_NO_EXC,
35391 ))
35392}
35393
35394/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35395///
35396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
35397#[inline]
35398#[target_feature(enable = "avx512f")]
35399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35400#[cfg_attr(test, assert_instr(vgetexpss))]
35401pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35402 transmute(src:vgetexpss(
35403 a:a.as_f32x4(),
35404 b:b.as_f32x4(),
35405 src:_mm_setzero_ps().as_f32x4(),
35406 mask:k,
35407 _MM_FROUND_NO_EXC,
35408 ))
35409}
35410
35411/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35412///
35413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
35414#[inline]
35415#[target_feature(enable = "avx512f")]
35416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35417#[cfg_attr(test, assert_instr(vgetexpsd))]
35418pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
35419 transmute(src:vgetexpsd(
35420 a:a.as_f64x2(),
35421 b:b.as_f64x2(),
35422 src:_mm_setzero_pd().as_f64x2(),
35423 mask:0b1,
35424 _MM_FROUND_NO_EXC,
35425 ))
35426}
35427
35428/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35429///
35430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
35431#[inline]
35432#[target_feature(enable = "avx512f")]
35433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35434#[cfg_attr(test, assert_instr(vgetexpsd))]
35435pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35436 transmute(src:vgetexpsd(
35437 a:a.as_f64x2(),
35438 b:b.as_f64x2(),
35439 src:src.as_f64x2(),
35440 mask:k,
35441 _MM_FROUND_NO_EXC,
35442 ))
35443}
35444
35445/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35446///
35447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
35448#[inline]
35449#[target_feature(enable = "avx512f")]
35450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35451#[cfg_attr(test, assert_instr(vgetexpsd))]
35452pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35453 transmute(src:vgetexpsd(
35454 a:a.as_f64x2(),
35455 b:b.as_f64x2(),
35456 src:_mm_setzero_pd().as_f64x2(),
35457 mask:k,
35458 _MM_FROUND_NO_EXC,
35459 ))
35460}
35461
35462/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35463/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35464/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35465/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35466/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35467/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35468/// The sign is determined by sc which can take the following values:\
35469/// _MM_MANT_SIGN_src // sign = sign(src)\
35470/// _MM_MANT_SIGN_zero // sign = 0\
35471/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35472/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35473///
35474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
35475#[inline]
35476#[target_feature(enable = "avx512f")]
35477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35478#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
35479#[rustc_legacy_const_generics(2, 3)]
35480pub unsafe fn _mm_getmant_ss<
35481 const NORM: _MM_MANTISSA_NORM_ENUM,
35482 const SIGN: _MM_MANTISSA_SIGN_ENUM,
35483>(
35484 a: __m128,
35485 b: __m128,
35486) -> __m128 {
35487 static_assert_uimm_bits!(NORM, 4);
35488 static_assert_uimm_bits!(SIGN, 2);
35489 let a: f32x4 = a.as_f32x4();
35490 let b: f32x4 = b.as_f32x4();
35491 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35492 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:zero, m:0b1, _MM_FROUND_CUR_DIRECTION);
35493 transmute(src:r)
35494}
35495
35496/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35497/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35498/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35499/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35500/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35501/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35502/// The sign is determined by sc which can take the following values:\
35503/// _MM_MANT_SIGN_src // sign = sign(src)\
35504/// _MM_MANT_SIGN_zero // sign = 0\
35505/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35506/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35507///
35508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
35509#[inline]
35510#[target_feature(enable = "avx512f")]
35511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35512#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
35513#[rustc_legacy_const_generics(4, 5)]
35514pub unsafe fn _mm_mask_getmant_ss<
35515 const NORM: _MM_MANTISSA_NORM_ENUM,
35516 const SIGN: _MM_MANTISSA_SIGN_ENUM,
35517>(
35518 src: __m128,
35519 k: __mmask8,
35520 a: __m128,
35521 b: __m128,
35522) -> __m128 {
35523 static_assert_uimm_bits!(NORM, 4);
35524 static_assert_uimm_bits!(SIGN, 2);
35525 let a: f32x4 = a.as_f32x4();
35526 let b: f32x4 = b.as_f32x4();
35527 let src: f32x4 = src.as_f32x4();
35528 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
35529 transmute(src:r)
35530}
35531
35532/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35533/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35534/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35535/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35536/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35537/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35538/// The sign is determined by sc which can take the following values:\
35539/// _MM_MANT_SIGN_src // sign = sign(src)\
35540/// _MM_MANT_SIGN_zero // sign = 0\
35541/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35542/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35543///
35544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
35545#[inline]
35546#[target_feature(enable = "avx512f")]
35547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35548#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
35549#[rustc_legacy_const_generics(3, 4)]
35550pub unsafe fn _mm_maskz_getmant_ss<
35551 const NORM: _MM_MANTISSA_NORM_ENUM,
35552 const SIGN: _MM_MANTISSA_SIGN_ENUM,
35553>(
35554 k: __mmask8,
35555 a: __m128,
35556 b: __m128,
35557) -> __m128 {
35558 static_assert_uimm_bits!(NORM, 4);
35559 static_assert_uimm_bits!(SIGN, 2);
35560 let a: f32x4 = a.as_f32x4();
35561 let b: f32x4 = b.as_f32x4();
35562 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35563 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
35564 transmute(src:r)
35565}
35566
35567/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35568/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35569/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35570/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35571/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35572/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35573/// The sign is determined by sc which can take the following values:\
35574/// _MM_MANT_SIGN_src // sign = sign(src)\
35575/// _MM_MANT_SIGN_zero // sign = 0\
35576/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35577/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35578///
35579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
35580#[inline]
35581#[target_feature(enable = "avx512f")]
35582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35583#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
35584#[rustc_legacy_const_generics(2, 3)]
35585pub unsafe fn _mm_getmant_sd<
35586 const NORM: _MM_MANTISSA_NORM_ENUM,
35587 const SIGN: _MM_MANTISSA_SIGN_ENUM,
35588>(
35589 a: __m128d,
35590 b: __m128d,
35591) -> __m128d {
35592 static_assert_uimm_bits!(NORM, 4);
35593 static_assert_uimm_bits!(SIGN, 2);
35594 let a: f64x2 = a.as_f64x2();
35595 let b: f64x2 = b.as_f64x2();
35596 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35597 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:zero, m:0b1, _MM_FROUND_CUR_DIRECTION);
35598 transmute(src:r)
35599}
35600
35601/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35602/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35603/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35604/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35605/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35606/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35607/// The sign is determined by sc which can take the following values:\
35608/// _MM_MANT_SIGN_src // sign = sign(src)\
35609/// _MM_MANT_SIGN_zero // sign = 0\
35610/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35612///
35613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
35614#[inline]
35615#[target_feature(enable = "avx512f")]
35616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35617#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
35618#[rustc_legacy_const_generics(4, 5)]
35619pub unsafe fn _mm_mask_getmant_sd<
35620 const NORM: _MM_MANTISSA_NORM_ENUM,
35621 const SIGN: _MM_MANTISSA_SIGN_ENUM,
35622>(
35623 src: __m128d,
35624 k: __mmask8,
35625 a: __m128d,
35626 b: __m128d,
35627) -> __m128d {
35628 static_assert_uimm_bits!(NORM, 4);
35629 static_assert_uimm_bits!(SIGN, 2);
35630 let a: f64x2 = a.as_f64x2();
35631 let b: f64x2 = b.as_f64x2();
35632 let src: f64x2 = src.as_f64x2();
35633 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
35634 transmute(src:r)
35635}
35636
35637/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35638/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35639/// _MM_MANT_NORM_1_2 // interval [1, 2)\
35640/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
35641/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
35642/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35643/// The sign is determined by sc which can take the following values:\
35644/// _MM_MANT_SIGN_src // sign = sign(src)\
35645/// _MM_MANT_SIGN_zero // sign = 0\
35646/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
35647/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35648///
35649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
35650#[inline]
35651#[target_feature(enable = "avx512f")]
35652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35653#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
35654#[rustc_legacy_const_generics(3, 4)]
35655pub unsafe fn _mm_maskz_getmant_sd<
35656 const NORM: _MM_MANTISSA_NORM_ENUM,
35657 const SIGN: _MM_MANTISSA_SIGN_ENUM,
35658>(
35659 k: __mmask8,
35660 a: __m128d,
35661 b: __m128d,
35662) -> __m128d {
35663 static_assert_uimm_bits!(NORM, 4);
35664 static_assert_uimm_bits!(SIGN, 2);
35665 let a: f64x2 = a.as_f64x2();
35666 let b: f64x2 = b.as_f64x2();
35667 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35668 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:zero, m:k, _MM_FROUND_CUR_DIRECTION);
35669 transmute(src:r)
35670}
35671
35672/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
35673/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35674/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35675/// _MM_FROUND_TO_NEG_INF // round down\
35676/// _MM_FROUND_TO_POS_INF // round up\
35677/// _MM_FROUND_TO_ZERO // truncate\
35678/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35679///
35680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
35681#[inline]
35682#[target_feature(enable = "avx512f")]
35683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35684#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
35685#[rustc_legacy_const_generics(2)]
35686pub unsafe fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
35687 static_assert_uimm_bits!(IMM8, 8);
35688 let a: f32x4 = a.as_f32x4();
35689 let b: f32x4 = b.as_f32x4();
35690 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35691 let r: f32x4 = vrndscaless(a, b, src:zero, mask:0b11111111, IMM8, _MM_FROUND_CUR_DIRECTION);
35692 transmute(src:r)
35693}
35694
35695/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
35696/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35697/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35698/// _MM_FROUND_TO_NEG_INF // round down\
35699/// _MM_FROUND_TO_POS_INF // round up\
35700/// _MM_FROUND_TO_ZERO // truncate\
35701/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35702///
35703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
35704#[inline]
35705#[target_feature(enable = "avx512f")]
35706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35707#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
35708#[rustc_legacy_const_generics(4)]
35709pub unsafe fn _mm_mask_roundscale_ss<const IMM8: i32>(
35710 src: __m128,
35711 k: __mmask8,
35712 a: __m128,
35713 b: __m128,
35714) -> __m128 {
35715 static_assert_uimm_bits!(IMM8, 8);
35716 let a: f32x4 = a.as_f32x4();
35717 let b: f32x4 = b.as_f32x4();
35718 let src: f32x4 = src.as_f32x4();
35719 let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35720 transmute(src:r)
35721}
35722
35723/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
35724/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35725/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35726/// _MM_FROUND_TO_NEG_INF // round down\
35727/// _MM_FROUND_TO_POS_INF // round up\
35728/// _MM_FROUND_TO_ZERO // truncate\
35729/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35730///
35731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
35732#[inline]
35733#[target_feature(enable = "avx512f")]
35734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35735#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
35736#[rustc_legacy_const_generics(3)]
35737pub unsafe fn _mm_maskz_roundscale_ss<const IMM8: i32>(
35738 k: __mmask8,
35739 a: __m128,
35740 b: __m128,
35741) -> __m128 {
35742 static_assert_uimm_bits!(IMM8, 8);
35743 let a: f32x4 = a.as_f32x4();
35744 let b: f32x4 = b.as_f32x4();
35745 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35746 let r: f32x4 = vrndscaless(a, b, src:zero, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35747 transmute(src:r)
35748}
35749
35750/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
35751/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35752/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35753/// _MM_FROUND_TO_NEG_INF // round down\
35754/// _MM_FROUND_TO_POS_INF // round up\
35755/// _MM_FROUND_TO_ZERO // truncate\
35756/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35757///
35758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
35759#[inline]
35760#[target_feature(enable = "avx512f")]
35761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35762#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
35763#[rustc_legacy_const_generics(2)]
35764pub unsafe fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
35765 static_assert_uimm_bits!(IMM8, 8);
35766 let a: f64x2 = a.as_f64x2();
35767 let b: f64x2 = b.as_f64x2();
35768 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35769 let r: f64x2 = vrndscalesd(a, b, src:zero, mask:0b11111111, IMM8, _MM_FROUND_CUR_DIRECTION);
35770 transmute(src:r)
35771}
35772
35773/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
35774/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35775/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35776/// _MM_FROUND_TO_NEG_INF // round down\
35777/// _MM_FROUND_TO_POS_INF // round up\
35778/// _MM_FROUND_TO_ZERO // truncate\
35779/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35780///
35781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
35782#[inline]
35783#[target_feature(enable = "avx512f")]
35784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35785#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
35786#[rustc_legacy_const_generics(4)]
35787pub unsafe fn _mm_mask_roundscale_sd<const IMM8: i32>(
35788 src: __m128d,
35789 k: __mmask8,
35790 a: __m128d,
35791 b: __m128d,
35792) -> __m128d {
35793 static_assert_uimm_bits!(IMM8, 8);
35794 let a: f64x2 = a.as_f64x2();
35795 let b: f64x2 = b.as_f64x2();
35796 let src: f64x2 = src.as_f64x2();
35797 let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35798 transmute(src:r)
35799}
35800
35801/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
35802/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35803/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
35804/// _MM_FROUND_TO_NEG_INF // round down\
35805/// _MM_FROUND_TO_POS_INF // round up\
35806/// _MM_FROUND_TO_ZERO // truncate\
35807/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
35808///
35809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
35810#[inline]
35811#[target_feature(enable = "avx512f")]
35812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35813#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
35814#[rustc_legacy_const_generics(3)]
35815pub unsafe fn _mm_maskz_roundscale_sd<const IMM8: i32>(
35816 k: __mmask8,
35817 a: __m128d,
35818 b: __m128d,
35819) -> __m128d {
35820 static_assert_uimm_bits!(IMM8, 8);
35821 let a: f64x2 = a.as_f64x2();
35822 let b: f64x2 = b.as_f64x2();
35823 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
35824 let r: f64x2 = vrndscalesd(a, b, src:zero, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
35825 transmute(src:r)
35826}
35827
35828/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
35831#[inline]
35832#[target_feature(enable = "avx512f")]
35833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35834#[cfg_attr(test, assert_instr(vscalefss))]
35835pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
35836 let a: f32x4 = a.as_f32x4();
35837 let b: f32x4 = b.as_f32x4();
35838 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
35839 transmute(src:vscalefss(a, b, src:zero, mask:0b11111111, _MM_FROUND_CUR_DIRECTION))
35840}
35841
35842/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35843///
35844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
35845#[inline]
35846#[target_feature(enable = "avx512f")]
35847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35848#[cfg_attr(test, assert_instr(vscalefss))]
35849pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35850 let a: f32x4 = a.as_f32x4();
35851 let b: f32x4 = b.as_f32x4();
35852 let src: f32x4 = src.as_f32x4();
35853 transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
35854}
35855
35856/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35857///
35858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
35859#[inline]
35860#[target_feature(enable = "avx512f")]
35861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35862#[cfg_attr(test, assert_instr(vscalefss))]
35863pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35864 transmute(src:vscalefss(
35865 a:a.as_f32x4(),
35866 b:b.as_f32x4(),
35867 src:_mm_setzero_ps().as_f32x4(),
35868 mask:k,
35869 _MM_FROUND_CUR_DIRECTION,
35870 ))
35871}
35872
35873/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
35874///
35875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
35876#[inline]
35877#[target_feature(enable = "avx512f")]
35878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35879#[cfg_attr(test, assert_instr(vscalefsd))]
35880pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
35881 transmute(src:vscalefsd(
35882 a:a.as_f64x2(),
35883 b:b.as_f64x2(),
35884 src:_mm_setzero_pd().as_f64x2(),
35885 mask:0b11111111,
35886 _MM_FROUND_CUR_DIRECTION,
35887 ))
35888}
35889
35890/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35891///
35892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
35893#[inline]
35894#[target_feature(enable = "avx512f")]
35895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35896#[cfg_attr(test, assert_instr(vscalefsd))]
35897pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35898 transmute(src:vscalefsd(
35899 a:a.as_f64x2(),
35900 b:b.as_f64x2(),
35901 src:src.as_f64x2(),
35902 mask:k,
35903 _MM_FROUND_CUR_DIRECTION,
35904 ))
35905}
35906
35907/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35908///
35909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
35910#[inline]
35911#[target_feature(enable = "avx512f")]
35912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35913#[cfg_attr(test, assert_instr(vscalefsd))]
35914pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35915 transmute(src:vscalefsd(
35916 a:a.as_f64x2(),
35917 b:b.as_f64x2(),
35918 src:_mm_setzero_pd().as_f64x2(),
35919 mask:k,
35920 _MM_FROUND_CUR_DIRECTION,
35921 ))
35922}
35923
35924/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35925///
35926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
35927#[inline]
35928#[target_feature(enable = "avx512f")]
35929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35930#[cfg_attr(test, assert_instr(vfmadd213ss))]
35931pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
35932 let mut fmadd: f32 = simd_extract!(a, 0);
35933 if (k & 0b00000001) != 0 {
35934 let extractb: f32 = simd_extract!(b, 0);
35935 let extractc: f32 = simd_extract!(c, 0);
35936 fmadd = vfmadd132ss(a:fmadd, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
35937 }
35938 simd_insert!(a, 0, fmadd)
35939}
35940
35941/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35942///
35943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
35944#[inline]
35945#[target_feature(enable = "avx512f")]
35946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35947#[cfg_attr(test, assert_instr(vfmadd213ss))]
35948pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
35949 let mut fmadd: f32 = 0.;
35950 if (k & 0b00000001) != 0 {
35951 let extracta: f32 = simd_extract!(a, 0);
35952 let extractb: f32 = simd_extract!(b, 0);
35953 let extractc: f32 = simd_extract!(c, 0);
35954 fmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
35955 }
35956 simd_insert!(a, 0, fmadd)
35957}
35958
35959/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
35960///
35961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
35962#[inline]
35963#[target_feature(enable = "avx512f")]
35964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35965#[cfg_attr(test, assert_instr(vfmadd213ss))]
35966pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
35967 let mut fmadd: f32 = simd_extract!(c, 0);
35968 if (k & 0b00000001) != 0 {
35969 let extracta: f32 = simd_extract!(a, 0);
35970 let extractb: f32 = simd_extract!(b, 0);
35971 fmadd = vfmadd132ss(a:extracta, b:extractb, c:fmadd, _MM_FROUND_CUR_DIRECTION);
35972 }
35973 simd_insert!(c, 0, fmadd)
35974}
35975
35976/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35977///
35978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
35979#[inline]
35980#[target_feature(enable = "avx512f")]
35981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35982#[cfg_attr(test, assert_instr(vfmadd213sd))]
35983pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
35984 let mut fmadd: f64 = simd_extract!(a, 0);
35985 if (k & 0b00000001) != 0 {
35986 let extractb: f64 = simd_extract!(b, 0);
35987 let extractc: f64 = simd_extract!(c, 0);
35988 fmadd = vfmadd132sd(a:fmadd, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
35989 }
35990 simd_insert!(a, 0, fmadd)
35991}
35992
35993/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35994///
35995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
35996#[inline]
35997#[target_feature(enable = "avx512f")]
35998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35999#[cfg_attr(test, assert_instr(vfmadd213sd))]
36000pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36001 let mut fmadd: f64 = 0.;
36002 if (k & 0b00000001) != 0 {
36003 let extracta: f64 = simd_extract!(a, 0);
36004 let extractb: f64 = simd_extract!(b, 0);
36005 let extractc: f64 = simd_extract!(c, 0);
36006 fmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36007 }
36008 simd_insert!(a, 0, fmadd)
36009}
36010
36011/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36012///
36013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
36014#[inline]
36015#[target_feature(enable = "avx512f")]
36016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36017#[cfg_attr(test, assert_instr(vfmadd213sd))]
36018pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36019 let mut fmadd: f64 = simd_extract!(c, 0);
36020 if (k & 0b00000001) != 0 {
36021 let extracta: f64 = simd_extract!(a, 0);
36022 let extractb: f64 = simd_extract!(b, 0);
36023 fmadd = vfmadd132sd(a:extracta, b:extractb, c:fmadd, _MM_FROUND_CUR_DIRECTION);
36024 }
36025 simd_insert!(c, 0, fmadd)
36026}
36027
36028/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
36029///
36030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
36031#[inline]
36032#[target_feature(enable = "avx512f")]
36033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36034#[cfg_attr(test, assert_instr(vfmsub213ss))]
36035pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36036 let mut fmsub: f32 = simd_extract!(a, 0);
36037 if (k & 0b00000001) != 0 {
36038 let extractb: f32 = simd_extract!(b, 0);
36039 let extractc: f32 = simd_extract!(c, 0);
36040 let extractc: f32 = -extractc;
36041 fmsub = vfmadd132ss(a:fmsub, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36042 }
36043 simd_insert!(a, 0, fmsub)
36044}
36045
36046/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36047///
36048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
36049#[inline]
36050#[target_feature(enable = "avx512f")]
36051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36052#[cfg_attr(test, assert_instr(vfmsub213ss))]
36053pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36054 let mut fmsub: f32 = 0.;
36055 if (k & 0b00000001) != 0 {
36056 let extracta: f32 = simd_extract!(a, 0);
36057 let extractb: f32 = simd_extract!(b, 0);
36058 let extractc: f32 = simd_extract!(c, 0);
36059 let extractc: f32 = -extractc;
36060 fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36061 }
36062 simd_insert!(a, 0, fmsub)
36063}
36064
36065/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36066///
36067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
36068#[inline]
36069#[target_feature(enable = "avx512f")]
36070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36071#[cfg_attr(test, assert_instr(vfmsub213ss))]
36072pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36073 let mut fmsub: f32 = simd_extract!(c, 0);
36074 if (k & 0b00000001) != 0 {
36075 let extracta: f32 = simd_extract!(a, 0);
36076 let extractb: f32 = simd_extract!(b, 0);
36077 let extractc: f32 = -fmsub;
36078 fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36079 }
36080 simd_insert!(c, 0, fmsub)
36081}
36082
36083/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36084///
36085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
36086#[inline]
36087#[target_feature(enable = "avx512f")]
36088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36089#[cfg_attr(test, assert_instr(vfmsub213sd))]
36090pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36091 let mut fmsub: f64 = simd_extract!(a, 0);
36092 if (k & 0b00000001) != 0 {
36093 let extractb: f64 = simd_extract!(b, 0);
36094 let extractc: f64 = simd_extract!(c, 0);
36095 let extractc: f64 = -extractc;
36096 fmsub = vfmadd132sd(a:fmsub, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36097 }
36098 simd_insert!(a, 0, fmsub)
36099}
36100
36101/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36102///
36103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
36104#[inline]
36105#[target_feature(enable = "avx512f")]
36106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36107#[cfg_attr(test, assert_instr(vfmsub213sd))]
36108pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36109 let mut fmsub: f64 = 0.;
36110 if (k & 0b00000001) != 0 {
36111 let extracta: f64 = simd_extract!(a, 0);
36112 let extractb: f64 = simd_extract!(b, 0);
36113 let extractc: f64 = simd_extract!(c, 0);
36114 let extractc: f64 = -extractc;
36115 fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36116 }
36117 simd_insert!(a, 0, fmsub)
36118}
36119
36120/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36121///
36122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
36123#[inline]
36124#[target_feature(enable = "avx512f")]
36125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36126#[cfg_attr(test, assert_instr(vfmsub213sd))]
36127pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36128 let mut fmsub: f64 = simd_extract!(c, 0);
36129 if (k & 0b00000001) != 0 {
36130 let extracta: f64 = simd_extract!(a, 0);
36131 let extractb: f64 = simd_extract!(b, 0);
36132 let extractc: f64 = -fmsub;
36133 fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36134 }
36135 simd_insert!(c, 0, fmsub)
36136}
36137
36138/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36139///
36140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
36141#[inline]
36142#[target_feature(enable = "avx512f")]
36143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36144#[cfg_attr(test, assert_instr(vfnmadd213ss))]
36145pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36146 let mut fnmadd: f32 = simd_extract!(a, 0);
36147 if (k & 0b00000001) != 0 {
36148 let extracta: f32 = -fnmadd;
36149 let extractb: f32 = simd_extract!(b, 0);
36150 let extractc: f32 = simd_extract!(c, 0);
36151 fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36152 }
36153 simd_insert!(a, 0, fnmadd)
36154}
36155
36156/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36157///
36158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
36159#[inline]
36160#[target_feature(enable = "avx512f")]
36161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36162#[cfg_attr(test, assert_instr(vfnmadd213ss))]
36163pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36164 let mut fnmadd: f32 = 0.;
36165 if (k & 0b00000001) != 0 {
36166 let extracta: f32 = simd_extract!(a, 0);
36167 let extracta: f32 = -extracta;
36168 let extractb: f32 = simd_extract!(b, 0);
36169 let extractc: f32 = simd_extract!(c, 0);
36170 fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36171 }
36172 simd_insert!(a, 0, fnmadd)
36173}
36174
36175/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36176///
36177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
36178#[inline]
36179#[target_feature(enable = "avx512f")]
36180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36181#[cfg_attr(test, assert_instr(vfnmadd213ss))]
36182pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36183 let mut fnmadd: f32 = simd_extract!(c, 0);
36184 if (k & 0b00000001) != 0 {
36185 let extracta: f32 = simd_extract!(a, 0);
36186 let extracta: f32 = -extracta;
36187 let extractb: f32 = simd_extract!(b, 0);
36188 fnmadd = vfmadd132ss(a:extracta, b:extractb, c:fnmadd, _MM_FROUND_CUR_DIRECTION);
36189 }
36190 simd_insert!(c, 0, fnmadd)
36191}
36192
36193/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36194///
36195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
36196#[inline]
36197#[target_feature(enable = "avx512f")]
36198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36199#[cfg_attr(test, assert_instr(vfnmadd213sd))]
36200pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36201 let mut fnmadd: f64 = simd_extract!(a, 0);
36202 if (k & 0b00000001) != 0 {
36203 let extracta: f64 = -fnmadd;
36204 let extractb: f64 = simd_extract!(b, 0);
36205 let extractc: f64 = simd_extract!(c, 0);
36206 fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36207 }
36208 simd_insert!(a, 0, fnmadd)
36209}
36210
36211/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36212///
36213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
36214#[inline]
36215#[target_feature(enable = "avx512f")]
36216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36217#[cfg_attr(test, assert_instr(vfnmadd213sd))]
36218pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36219 let mut fnmadd: f64 = 0.;
36220 if (k & 0b00000001) != 0 {
36221 let extracta: f64 = simd_extract!(a, 0);
36222 let extracta: f64 = -extracta;
36223 let extractb: f64 = simd_extract!(b, 0);
36224 let extractc: f64 = simd_extract!(c, 0);
36225 fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36226 }
36227 simd_insert!(a, 0, fnmadd)
36228}
36229
36230/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36231///
36232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
36233#[inline]
36234#[target_feature(enable = "avx512f")]
36235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36236#[cfg_attr(test, assert_instr(vfnmadd213sd))]
36237pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36238 let mut fnmadd: f64 = simd_extract!(c, 0);
36239 if (k & 0b00000001) != 0 {
36240 let extracta: f64 = simd_extract!(a, 0);
36241 let extracta: f64 = -extracta;
36242 let extractb: f64 = simd_extract!(b, 0);
36243 fnmadd = vfmadd132sd(a:extracta, b:extractb, c:fnmadd, _MM_FROUND_CUR_DIRECTION);
36244 }
36245 simd_insert!(c, 0, fnmadd)
36246}
36247
36248/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36249///
36250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
36251#[inline]
36252#[target_feature(enable = "avx512f")]
36253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36254#[cfg_attr(test, assert_instr(vfnmsub213ss))]
36255pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36256 let mut fnmsub: f32 = simd_extract!(a, 0);
36257 if (k & 0b00000001) != 0 {
36258 let extracta: f32 = -fnmsub;
36259 let extractb: f32 = simd_extract!(b, 0);
36260 let extractc: f32 = simd_extract!(c, 0);
36261 let extractc: f32 = -extractc;
36262 fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36263 }
36264 simd_insert!(a, 0, fnmsub)
36265}
36266
36267/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36268///
36269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
36270#[inline]
36271#[target_feature(enable = "avx512f")]
36272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36273#[cfg_attr(test, assert_instr(vfnmsub213ss))]
36274pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36275 let mut fnmsub: f32 = 0.;
36276 if (k & 0b00000001) != 0 {
36277 let extracta: f32 = simd_extract!(a, 0);
36278 let extracta: f32 = -extracta;
36279 let extractb: f32 = simd_extract!(b, 0);
36280 let extractc: f32 = simd_extract!(c, 0);
36281 let extractc: f32 = -extractc;
36282 fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36283 }
36284 simd_insert!(a, 0, fnmsub)
36285}
36286
36287/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36288///
36289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
36290#[inline]
36291#[target_feature(enable = "avx512f")]
36292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36293#[cfg_attr(test, assert_instr(vfnmsub213ss))]
36294pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36295 let mut fnmsub: f32 = simd_extract!(c, 0);
36296 if (k & 0b00000001) != 0 {
36297 let extracta: f32 = simd_extract!(a, 0);
36298 let extracta: f32 = -extracta;
36299 let extractb: f32 = simd_extract!(b, 0);
36300 let extractc: f32 = -fnmsub;
36301 fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36302 }
36303 simd_insert!(c, 0, fnmsub)
36304}
36305
36306/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36307///
36308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
36309#[inline]
36310#[target_feature(enable = "avx512f")]
36311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36312#[cfg_attr(test, assert_instr(vfnmsub213sd))]
36313pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36314 let mut fnmsub: f64 = simd_extract!(a, 0);
36315 if (k & 0b00000001) != 0 {
36316 let extracta: f64 = -fnmsub;
36317 let extractb: f64 = simd_extract!(b, 0);
36318 let extractc: f64 = simd_extract!(c, 0);
36319 let extractc: f64 = -extractc;
36320 fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36321 }
36322 simd_insert!(a, 0, fnmsub)
36323}
36324
36325/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36326///
36327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
36328#[inline]
36329#[target_feature(enable = "avx512f")]
36330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36331#[cfg_attr(test, assert_instr(vfnmsub213sd))]
36332pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36333 let mut fnmsub: f64 = 0.;
36334 if (k & 0b00000001) != 0 {
36335 let extracta: f64 = simd_extract!(a, 0);
36336 let extracta: f64 = -extracta;
36337 let extractb: f64 = simd_extract!(b, 0);
36338 let extractc: f64 = simd_extract!(c, 0);
36339 let extractc: f64 = -extractc;
36340 fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36341 }
36342 simd_insert!(a, 0, fnmsub)
36343}
36344
36345/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36346///
36347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
36348#[inline]
36349#[target_feature(enable = "avx512f")]
36350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36351#[cfg_attr(test, assert_instr(vfnmsub213sd))]
36352pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36353 let mut fnmsub: f64 = simd_extract!(c, 0);
36354 if (k & 0b00000001) != 0 {
36355 let extracta: f64 = simd_extract!(a, 0);
36356 let extracta: f64 = -extracta;
36357 let extractb: f64 = simd_extract!(b, 0);
36358 let extractc: f64 = -fnmsub;
36359 fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, _MM_FROUND_CUR_DIRECTION);
36360 }
36361 simd_insert!(c, 0, fnmsub)
36362}
36363
36364/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36365///
36366/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36367/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36368/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36369/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36370/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36371/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36372///
36373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
36374#[inline]
36375#[target_feature(enable = "avx512f")]
36376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36377#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
36378#[rustc_legacy_const_generics(2)]
36379pub unsafe fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36380 static_assert_rounding!(ROUNDING);
36381 let a: f32x4 = a.as_f32x4();
36382 let b: f32x4 = b.as_f32x4();
36383 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36384 let r: f32x4 = vaddss(a, b, src:zero, mask:0b1, ROUNDING);
36385 transmute(src:r)
36386}
36387
36388/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36389///
36390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36391/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36392/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36393/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36394/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36395/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36396///
36397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
36398#[inline]
36399#[target_feature(enable = "avx512f")]
36400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36401#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
36402#[rustc_legacy_const_generics(4)]
36403pub unsafe fn _mm_mask_add_round_ss<const ROUNDING: i32>(
36404 src: __m128,
36405 k: __mmask8,
36406 a: __m128,
36407 b: __m128,
36408) -> __m128 {
36409 static_assert_rounding!(ROUNDING);
36410 let a: f32x4 = a.as_f32x4();
36411 let b: f32x4 = b.as_f32x4();
36412 let src: f32x4 = src.as_f32x4();
36413 let r: f32x4 = vaddss(a, b, src, mask:k, ROUNDING);
36414 transmute(src:r)
36415}
36416
36417/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36418///
36419/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36420/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36421/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36422/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36423/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36424/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36425///
36426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
36427#[inline]
36428#[target_feature(enable = "avx512f")]
36429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36430#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
36431#[rustc_legacy_const_generics(3)]
36432pub unsafe fn _mm_maskz_add_round_ss<const ROUNDING: i32>(
36433 k: __mmask8,
36434 a: __m128,
36435 b: __m128,
36436) -> __m128 {
36437 static_assert_rounding!(ROUNDING);
36438 let a: f32x4 = a.as_f32x4();
36439 let b: f32x4 = b.as_f32x4();
36440 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36441 let r: f32x4 = vaddss(a, b, src:zero, mask:k, ROUNDING);
36442 transmute(src:r)
36443}
36444
36445/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36446///
36447/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36448/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36449/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36450/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36451/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36452/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36453///
36454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
36455#[inline]
36456#[target_feature(enable = "avx512f")]
36457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36458#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
36459#[rustc_legacy_const_generics(2)]
36460pub unsafe fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36461 static_assert_rounding!(ROUNDING);
36462 let a: f64x2 = a.as_f64x2();
36463 let b: f64x2 = b.as_f64x2();
36464 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36465 let r: f64x2 = vaddsd(a, b, src:zero, mask:0b1, ROUNDING);
36466 transmute(src:r)
36467}
36468
36469/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36470///
36471/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36472/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36473/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36474/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36475/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36476/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36477///
36478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_Sd&expand=149)
36479#[inline]
36480#[target_feature(enable = "avx512f")]
36481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36482#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
36483#[rustc_legacy_const_generics(4)]
36484pub unsafe fn _mm_mask_add_round_sd<const ROUNDING: i32>(
36485 src: __m128d,
36486 k: __mmask8,
36487 a: __m128d,
36488 b: __m128d,
36489) -> __m128d {
36490 static_assert_rounding!(ROUNDING);
36491 let a: f64x2 = a.as_f64x2();
36492 let b: f64x2 = b.as_f64x2();
36493 let src: f64x2 = src.as_f64x2();
36494 let r: f64x2 = vaddsd(a, b, src, mask:k, ROUNDING);
36495 transmute(src:r)
36496}
36497
36498/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36499///
36500/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36501/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36502/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36503/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36504/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36505/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36506///
36507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
36508#[inline]
36509#[target_feature(enable = "avx512f")]
36510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36511#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
36512#[rustc_legacy_const_generics(3)]
36513pub unsafe fn _mm_maskz_add_round_sd<const ROUNDING: i32>(
36514 k: __mmask8,
36515 a: __m128d,
36516 b: __m128d,
36517) -> __m128d {
36518 static_assert_rounding!(ROUNDING);
36519 let a: f64x2 = a.as_f64x2();
36520 let b: f64x2 = b.as_f64x2();
36521 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36522 let r: f64x2 = vaddsd(a, b, src:zero, mask:k, ROUNDING);
36523 transmute(src:r)
36524}
36525
36526/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36527///
36528/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36529/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36530/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36531/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36532/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36533/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36534///
36535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
36536#[inline]
36537#[target_feature(enable = "avx512f")]
36538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36539#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
36540#[rustc_legacy_const_generics(2)]
36541pub unsafe fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36542 static_assert_rounding!(ROUNDING);
36543 let a: f32x4 = a.as_f32x4();
36544 let b: f32x4 = b.as_f32x4();
36545 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36546 let r: f32x4 = vsubss(a, b, src:zero, mask:0b1, ROUNDING);
36547 transmute(src:r)
36548}
36549
36550/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36551///
36552/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36553/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36554/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36555/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36556/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36557/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36558///
36559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
36560#[inline]
36561#[target_feature(enable = "avx512f")]
36562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36563#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
36564#[rustc_legacy_const_generics(4)]
36565pub unsafe fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
36566 src: __m128,
36567 k: __mmask8,
36568 a: __m128,
36569 b: __m128,
36570) -> __m128 {
36571 static_assert_rounding!(ROUNDING);
36572 let a: f32x4 = a.as_f32x4();
36573 let b: f32x4 = b.as_f32x4();
36574 let src: f32x4 = src.as_f32x4();
36575 let r: f32x4 = vsubss(a, b, src, mask:k, ROUNDING);
36576 transmute(src:r)
36577}
36578
36579/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36580///
36581/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36582/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36583/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36584/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36585/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36586/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36587///
36588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
36589#[inline]
36590#[target_feature(enable = "avx512f")]
36591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36592#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
36593#[rustc_legacy_const_generics(3)]
36594pub unsafe fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(
36595 k: __mmask8,
36596 a: __m128,
36597 b: __m128,
36598) -> __m128 {
36599 static_assert_rounding!(ROUNDING);
36600 let a: f32x4 = a.as_f32x4();
36601 let b: f32x4 = b.as_f32x4();
36602 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36603 let r: f32x4 = vsubss(a, b, src:zero, mask:k, ROUNDING);
36604 transmute(src:r)
36605}
36606
36607/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36608///
36609/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36610/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36611/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36612/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36613/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36614/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36615///
36616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
36617#[inline]
36618#[target_feature(enable = "avx512f")]
36619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36620#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
36621#[rustc_legacy_const_generics(2)]
36622pub unsafe fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36623 static_assert_rounding!(ROUNDING);
36624 let a: f64x2 = a.as_f64x2();
36625 let b: f64x2 = b.as_f64x2();
36626 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36627 let r: f64x2 = vsubsd(a, b, src:zero, mask:0b1, ROUNDING);
36628 transmute(src:r)
36629}
36630
36631/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36632///
36633/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36634/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36635/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36636/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36637/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36638/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36639///
36640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
36641#[inline]
36642#[target_feature(enable = "avx512f")]
36643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36644#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
36645#[rustc_legacy_const_generics(4)]
36646pub unsafe fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
36647 src: __m128d,
36648 k: __mmask8,
36649 a: __m128d,
36650 b: __m128d,
36651) -> __m128d {
36652 static_assert_rounding!(ROUNDING);
36653 let a: f64x2 = a.as_f64x2();
36654 let b: f64x2 = b.as_f64x2();
36655 let src: f64x2 = src.as_f64x2();
36656 let r: f64x2 = vsubsd(a, b, src, mask:k, ROUNDING);
36657 transmute(src:r)
36658}
36659
36660/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36661///
36662/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36663/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36664/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36665/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36666/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36667/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36668///
36669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
36670#[inline]
36671#[target_feature(enable = "avx512f")]
36672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36673#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
36674#[rustc_legacy_const_generics(3)]
36675pub unsafe fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(
36676 k: __mmask8,
36677 a: __m128d,
36678 b: __m128d,
36679) -> __m128d {
36680 static_assert_rounding!(ROUNDING);
36681 let a: f64x2 = a.as_f64x2();
36682 let b: f64x2 = b.as_f64x2();
36683 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36684 let r: f64x2 = vsubsd(a, b, src:zero, mask:k, ROUNDING);
36685 transmute(src:r)
36686}
36687
36688/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36689///
36690/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36691/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36692/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36693/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36694/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36695/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36696///
36697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
36698#[inline]
36699#[target_feature(enable = "avx512f")]
36700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36701#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
36702#[rustc_legacy_const_generics(2)]
36703pub unsafe fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36704 static_assert_rounding!(ROUNDING);
36705 let a: f32x4 = a.as_f32x4();
36706 let b: f32x4 = b.as_f32x4();
36707 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36708 let r: f32x4 = vmulss(a, b, src:zero, mask:0b1, ROUNDING);
36709 transmute(src:r)
36710}
36711
36712/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36713///
36714/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36715/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36716/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36717/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36718/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36719/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36720///
36721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
36722#[inline]
36723#[target_feature(enable = "avx512f")]
36724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36725#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
36726#[rustc_legacy_const_generics(4)]
36727pub unsafe fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
36728 src: __m128,
36729 k: __mmask8,
36730 a: __m128,
36731 b: __m128,
36732) -> __m128 {
36733 static_assert_rounding!(ROUNDING);
36734 let a: f32x4 = a.as_f32x4();
36735 let b: f32x4 = b.as_f32x4();
36736 let src: f32x4 = src.as_f32x4();
36737 let r: f32x4 = vmulss(a, b, src, mask:k, ROUNDING);
36738 transmute(src:r)
36739}
36740
36741/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36742///
36743/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36744/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36745/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36746/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36747/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36748/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36749///
36750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
36751#[inline]
36752#[target_feature(enable = "avx512f")]
36753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36754#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
36755#[rustc_legacy_const_generics(3)]
36756pub unsafe fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(
36757 k: __mmask8,
36758 a: __m128,
36759 b: __m128,
36760) -> __m128 {
36761 static_assert_rounding!(ROUNDING);
36762 let a: f32x4 = a.as_f32x4();
36763 let b: f32x4 = b.as_f32x4();
36764 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36765 let r: f32x4 = vmulss(a, b, src:zero, mask:k, ROUNDING);
36766 transmute(src:r)
36767}
36768
36769/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36770///
36771/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36772/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36773/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36774/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36775/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36776/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36777///
36778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
36779#[inline]
36780#[target_feature(enable = "avx512f")]
36781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36782#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
36783#[rustc_legacy_const_generics(2)]
36784pub unsafe fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36785 static_assert_rounding!(ROUNDING);
36786 let a: f64x2 = a.as_f64x2();
36787 let b: f64x2 = b.as_f64x2();
36788 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36789 let r: f64x2 = vmulsd(a, b, src:zero, mask:0b1, ROUNDING);
36790 transmute(src:r)
36791}
36792
36793/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36794///
36795/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36796/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36797/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36798/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36799/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36800/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36801///
36802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
36803#[inline]
36804#[target_feature(enable = "avx512f")]
36805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36806#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
36807#[rustc_legacy_const_generics(4)]
36808pub unsafe fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
36809 src: __m128d,
36810 k: __mmask8,
36811 a: __m128d,
36812 b: __m128d,
36813) -> __m128d {
36814 static_assert_rounding!(ROUNDING);
36815 let a: f64x2 = a.as_f64x2();
36816 let b: f64x2 = b.as_f64x2();
36817 let src: f64x2 = src.as_f64x2();
36818 let r: f64x2 = vmulsd(a, b, src, mask:k, ROUNDING);
36819 transmute(src:r)
36820}
36821
36822/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36823///
36824/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36825/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36826/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36827/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36828/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36829/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36830///
36831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
36832#[inline]
36833#[target_feature(enable = "avx512f")]
36834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36835#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
36836#[rustc_legacy_const_generics(3)]
36837pub unsafe fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(
36838 k: __mmask8,
36839 a: __m128d,
36840 b: __m128d,
36841) -> __m128d {
36842 static_assert_rounding!(ROUNDING);
36843 let a: f64x2 = a.as_f64x2();
36844 let b: f64x2 = b.as_f64x2();
36845 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36846 let r: f64x2 = vmulsd(a, b, src:zero, mask:k, ROUNDING);
36847 transmute(src:r)
36848}
36849
36850/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36851///
36852/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36853/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36854/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36855/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36856/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36857/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36858///
36859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
36860#[inline]
36861#[target_feature(enable = "avx512f")]
36862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36863#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
36864#[rustc_legacy_const_generics(2)]
36865pub unsafe fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36866 static_assert_rounding!(ROUNDING);
36867 let a: f32x4 = a.as_f32x4();
36868 let b: f32x4 = b.as_f32x4();
36869 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36870 let r: f32x4 = vdivss(a, b, src:zero, mask:0b1, ROUNDING);
36871 transmute(src:r)
36872}
36873
36874/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36875///
36876/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36877/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36878/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36879/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36880/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36881/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36882///
36883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
36884#[inline]
36885#[target_feature(enable = "avx512f")]
36886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36887#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
36888#[rustc_legacy_const_generics(4)]
36889pub unsafe fn _mm_mask_div_round_ss<const ROUNDING: i32>(
36890 src: __m128,
36891 k: __mmask8,
36892 a: __m128,
36893 b: __m128,
36894) -> __m128 {
36895 static_assert_rounding!(ROUNDING);
36896 let a: f32x4 = a.as_f32x4();
36897 let b: f32x4 = b.as_f32x4();
36898 let src: f32x4 = src.as_f32x4();
36899 let r: f32x4 = vdivss(a, b, src, mask:k, ROUNDING);
36900 transmute(src:r)
36901}
36902
36903/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36904///
36905/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36906/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36907/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36908/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36909/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36910/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36911///
36912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
36913#[inline]
36914#[target_feature(enable = "avx512f")]
36915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36916#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
36917#[rustc_legacy_const_generics(3)]
36918pub unsafe fn _mm_maskz_div_round_ss<const ROUNDING: i32>(
36919 k: __mmask8,
36920 a: __m128,
36921 b: __m128,
36922) -> __m128 {
36923 static_assert_rounding!(ROUNDING);
36924 let a: f32x4 = a.as_f32x4();
36925 let b: f32x4 = b.as_f32x4();
36926 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
36927 let r: f32x4 = vdivss(a, b, src:zero, mask:k, ROUNDING);
36928 transmute(src:r)
36929}
36930
36931/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36932///
36933/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36934/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36935/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36936/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36937/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36938/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36939///
36940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
36941#[inline]
36942#[target_feature(enable = "avx512f")]
36943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36944#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
36945#[rustc_legacy_const_generics(2)]
36946pub unsafe fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36947 static_assert_rounding!(ROUNDING);
36948 let a: f64x2 = a.as_f64x2();
36949 let b: f64x2 = b.as_f64x2();
36950 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
36951 let r: f64x2 = vdivsd(a, b, src:zero, mask:0b1, ROUNDING);
36952 transmute(src:r)
36953}
36954
36955/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36956///
36957/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36958/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36959/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36960/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36961/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36962/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36963///
36964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
36965#[inline]
36966#[target_feature(enable = "avx512f")]
36967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36968#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
36969#[rustc_legacy_const_generics(4)]
36970pub unsafe fn _mm_mask_div_round_sd<const ROUNDING: i32>(
36971 src: __m128d,
36972 k: __mmask8,
36973 a: __m128d,
36974 b: __m128d,
36975) -> __m128d {
36976 static_assert_rounding!(ROUNDING);
36977 let a: f64x2 = a.as_f64x2();
36978 let b: f64x2 = b.as_f64x2();
36979 let src: f64x2 = src.as_f64x2();
36980 let r: f64x2 = vdivsd(a, b, src, mask:k, ROUNDING);
36981 transmute(src:r)
36982}
36983
36984/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36985///
36986/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36987/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
36988/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
36989/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
36990/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
36991/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
36992///
36993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
36994#[inline]
36995#[target_feature(enable = "avx512f")]
36996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36997#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
36998#[rustc_legacy_const_generics(3)]
36999pub unsafe fn _mm_maskz_div_round_sd<const ROUNDING: i32>(
37000 k: __mmask8,
37001 a: __m128d,
37002 b: __m128d,
37003) -> __m128d {
37004 static_assert_rounding!(ROUNDING);
37005 let a: f64x2 = a.as_f64x2();
37006 let b: f64x2 = b.as_f64x2();
37007 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37008 let r: f64x2 = vdivsd(a, b, src:zero, mask:k, ROUNDING);
37009 transmute(src:r)
37010}
37011
37012/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37013/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37014///
37015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
37016#[inline]
37017#[target_feature(enable = "avx512f")]
37018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37019#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
37020#[rustc_legacy_const_generics(2)]
37021pub unsafe fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37022 static_assert_sae!(SAE);
37023 let a: f32x4 = a.as_f32x4();
37024 let b: f32x4 = b.as_f32x4();
37025 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37026 let r: f32x4 = vmaxss(a, b, src:zero, mask:0b1, SAE);
37027 transmute(src:r)
37028}
37029
37030/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37031/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37032///
37033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
37034#[inline]
37035#[target_feature(enable = "avx512f")]
37036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37037#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
37038#[rustc_legacy_const_generics(4)]
37039pub unsafe fn _mm_mask_max_round_ss<const SAE: i32>(
37040 src: __m128,
37041 k: __mmask8,
37042 a: __m128,
37043 b: __m128,
37044) -> __m128 {
37045 static_assert_sae!(SAE);
37046 let a: f32x4 = a.as_f32x4();
37047 let b: f32x4 = b.as_f32x4();
37048 let src: f32x4 = src.as_f32x4();
37049 let r: f32x4 = vmaxss(a, b, src, mask:k, SAE);
37050 transmute(src:r)
37051}
37052
37053/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37054/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37055///
37056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
37057#[inline]
37058#[target_feature(enable = "avx512f")]
37059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37060#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
37061#[rustc_legacy_const_generics(3)]
37062pub unsafe fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37063 static_assert_sae!(SAE);
37064 let a: f32x4 = a.as_f32x4();
37065 let b: f32x4 = b.as_f32x4();
37066 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37067 let r: f32x4 = vmaxss(a, b, src:zero, mask:k, SAE);
37068 transmute(src:r)
37069}
37070
37071/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37072/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37073///
37074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
37075#[inline]
37076#[target_feature(enable = "avx512f")]
37077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37078#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
37079#[rustc_legacy_const_generics(2)]
37080pub unsafe fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37081 static_assert_sae!(SAE);
37082 let a: f64x2 = a.as_f64x2();
37083 let b: f64x2 = b.as_f64x2();
37084 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37085 let r: f64x2 = vmaxsd(a, b, src:zero, mask:0b1, SAE);
37086 transmute(src:r)
37087}
37088
37089/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37090/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37091///
37092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
37093#[inline]
37094#[target_feature(enable = "avx512f")]
37095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37096#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
37097#[rustc_legacy_const_generics(4)]
37098pub unsafe fn _mm_mask_max_round_sd<const SAE: i32>(
37099 src: __m128d,
37100 k: __mmask8,
37101 a: __m128d,
37102 b: __m128d,
37103) -> __m128d {
37104 static_assert_sae!(SAE);
37105 let a: f64x2 = a.as_f64x2();
37106 let b: f64x2 = b.as_f64x2();
37107 let src: f64x2 = src.as_f64x2();
37108 let r: f64x2 = vmaxsd(a, b, src, mask:k, SAE);
37109 transmute(src:r)
37110}
37111
37112/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37113/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37114///
37115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
37116#[inline]
37117#[target_feature(enable = "avx512f")]
37118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37119#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
37120#[rustc_legacy_const_generics(3)]
37121pub unsafe fn _mm_maskz_max_round_sd<const SAE: i32>(
37122 k: __mmask8,
37123 a: __m128d,
37124 b: __m128d,
37125) -> __m128d {
37126 static_assert_sae!(SAE);
37127 let a: f64x2 = a.as_f64x2();
37128 let b: f64x2 = b.as_f64x2();
37129 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37130 let r: f64x2 = vmaxsd(a, b, src:zero, mask:k, SAE);
37131 transmute(src:r)
37132}
37133
37134/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37135/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37136///
37137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
37138#[inline]
37139#[target_feature(enable = "avx512f")]
37140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37141#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
37142#[rustc_legacy_const_generics(2)]
37143pub unsafe fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37144 static_assert_sae!(SAE);
37145 let a: f32x4 = a.as_f32x4();
37146 let b: f32x4 = b.as_f32x4();
37147 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37148 let r: f32x4 = vminss(a, b, src:zero, mask:0b1, SAE);
37149 transmute(src:r)
37150}
37151
37152/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37153/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37154///
37155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_Ss&expand=3780)
37156#[inline]
37157#[target_feature(enable = "avx512f")]
37158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37159#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
37160#[rustc_legacy_const_generics(4)]
37161pub unsafe fn _mm_mask_min_round_ss<const SAE: i32>(
37162 src: __m128,
37163 k: __mmask8,
37164 a: __m128,
37165 b: __m128,
37166) -> __m128 {
37167 static_assert_sae!(SAE);
37168 let a: f32x4 = a.as_f32x4();
37169 let b: f32x4 = b.as_f32x4();
37170 let src: f32x4 = src.as_f32x4();
37171 let r: f32x4 = vminss(a, b, src, mask:k, SAE);
37172 transmute(src:r)
37173}
37174
37175/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37176/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37177///
37178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
37179#[inline]
37180#[target_feature(enable = "avx512f")]
37181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37182#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
37183#[rustc_legacy_const_generics(3)]
37184pub unsafe fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37185 static_assert_sae!(SAE);
37186 let a: f32x4 = a.as_f32x4();
37187 let b: f32x4 = b.as_f32x4();
37188 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37189 let r: f32x4 = vminss(a, b, src:zero, mask:k, SAE);
37190 transmute(src:r)
37191}
37192
37193/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
37194/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37195///
37196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
37197#[inline]
37198#[target_feature(enable = "avx512f")]
37199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37200#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
37201#[rustc_legacy_const_generics(2)]
37202pub unsafe fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37203 static_assert_sae!(SAE);
37204 let a: f64x2 = a.as_f64x2();
37205 let b: f64x2 = b.as_f64x2();
37206 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37207 let r: f64x2 = vminsd(a, b, src:zero, mask:0b1, SAE);
37208 transmute(src:r)
37209}
37210
37211/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37212/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37213///
37214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
37215#[inline]
37216#[target_feature(enable = "avx512f")]
37217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37218#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
37219#[rustc_legacy_const_generics(4)]
37220pub unsafe fn _mm_mask_min_round_sd<const SAE: i32>(
37221 src: __m128d,
37222 k: __mmask8,
37223 a: __m128d,
37224 b: __m128d,
37225) -> __m128d {
37226 static_assert_sae!(SAE);
37227 let a: f64x2 = a.as_f64x2();
37228 let b: f64x2 = b.as_f64x2();
37229 let src: f64x2 = src.as_f64x2();
37230 let r: f64x2 = vminsd(a, b, src, mask:k, SAE);
37231 transmute(src:r)
37232}
37233
37234/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37235/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37236///
37237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_Sd&expand=3778)
37238#[inline]
37239#[target_feature(enable = "avx512f")]
37240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37241#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
37242#[rustc_legacy_const_generics(3)]
37243pub unsafe fn _mm_maskz_min_round_sd<const SAE: i32>(
37244 k: __mmask8,
37245 a: __m128d,
37246 b: __m128d,
37247) -> __m128d {
37248 static_assert_sae!(SAE);
37249 let a: f64x2 = a.as_f64x2();
37250 let b: f64x2 = b.as_f64x2();
37251 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37252 let r: f64x2 = vminsd(a, b, src:zero, mask:k, SAE);
37253 transmute(src:r)
37254}
37255
37256/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37257///
37258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37259/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37260/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37261/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37262/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37263/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37264///
37265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
37266#[inline]
37267#[target_feature(enable = "avx512f")]
37268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37269#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
37270#[rustc_legacy_const_generics(2)]
37271pub unsafe fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37272 static_assert_rounding!(ROUNDING);
37273 let a: f32x4 = a.as_f32x4();
37274 let b: f32x4 = b.as_f32x4();
37275 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37276 let r: f32x4 = vsqrtss(a, b, src:zero, mask:0b1, ROUNDING);
37277 transmute(src:r)
37278}
37279
37280/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37281///
37282/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37283/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37284/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37285/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37286/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37287/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37288///
37289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
37290#[inline]
37291#[target_feature(enable = "avx512f")]
37292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37293#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
37294#[rustc_legacy_const_generics(4)]
37295pub unsafe fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
37296 src: __m128,
37297 k: __mmask8,
37298 a: __m128,
37299 b: __m128,
37300) -> __m128 {
37301 static_assert_rounding!(ROUNDING);
37302 let a: f32x4 = a.as_f32x4();
37303 let b: f32x4 = b.as_f32x4();
37304 let src: f32x4 = src.as_f32x4();
37305 let r: f32x4 = vsqrtss(a, b, src, mask:k, ROUNDING);
37306 transmute(src:r)
37307}
37308
37309/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37310///
37311/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37312/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37313/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37314/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37315/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37316/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37317///
37318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
37319#[inline]
37320#[target_feature(enable = "avx512f")]
37321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37322#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
37323#[rustc_legacy_const_generics(3)]
37324pub unsafe fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(
37325 k: __mmask8,
37326 a: __m128,
37327 b: __m128,
37328) -> __m128 {
37329 static_assert_rounding!(ROUNDING);
37330 let a: f32x4 = a.as_f32x4();
37331 let b: f32x4 = b.as_f32x4();
37332 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37333 let r: f32x4 = vsqrtss(a, b, src:zero, mask:k, ROUNDING);
37334 transmute(src:r)
37335}
37336
37337/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37338///
37339/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37340/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37341/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37342/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37343/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37344/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37345///
37346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
37347#[inline]
37348#[target_feature(enable = "avx512f")]
37349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37350#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
37351#[rustc_legacy_const_generics(2)]
37352pub unsafe fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
37353 static_assert_rounding!(ROUNDING);
37354 let a: f64x2 = a.as_f64x2();
37355 let b: f64x2 = b.as_f64x2();
37356 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37357 let r: f64x2 = vsqrtsd(a, b, src:zero, mask:0b1, ROUNDING);
37358 transmute(src:r)
37359}
37360
37361/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37362///
37363/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37364/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37365/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37366/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37367/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37368/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37369///
37370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
37371#[inline]
37372#[target_feature(enable = "avx512f")]
37373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37374#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
37375#[rustc_legacy_const_generics(4)]
37376pub unsafe fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
37377 src: __m128d,
37378 k: __mmask8,
37379 a: __m128d,
37380 b: __m128d,
37381) -> __m128d {
37382 static_assert_rounding!(ROUNDING);
37383 let a: f64x2 = a.as_f64x2();
37384 let b: f64x2 = b.as_f64x2();
37385 let src: f64x2 = src.as_f64x2();
37386 let r: f64x2 = vsqrtsd(a, b, src, mask:k, ROUNDING);
37387 transmute(src:r)
37388}
37389
37390/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37391///
37392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37393/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37394/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37395/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37396/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37397/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37398///
37399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
37400#[inline]
37401#[target_feature(enable = "avx512f")]
37402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37403#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
37404#[rustc_legacy_const_generics(3)]
37405pub unsafe fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
37406 k: __mmask8,
37407 a: __m128d,
37408 b: __m128d,
37409) -> __m128d {
37410 static_assert_rounding!(ROUNDING);
37411 let a: f64x2 = a.as_f64x2();
37412 let b: f64x2 = b.as_f64x2();
37413 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37414 let r: f64x2 = vsqrtsd(a, b, src:zero, mask:k, ROUNDING);
37415 transmute(src:r)
37416}
37417
37418/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37419/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37420///
37421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
37422#[inline]
37423#[target_feature(enable = "avx512f")]
37424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37425#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
37426#[rustc_legacy_const_generics(2)]
37427pub unsafe fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37428 static_assert_sae!(SAE);
37429 let a: f32x4 = a.as_f32x4();
37430 let b: f32x4 = b.as_f32x4();
37431 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37432 let r: f32x4 = vgetexpss(a, b, src:zero, mask:0b1, SAE);
37433 transmute(src:r)
37434}
37435
37436/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37437/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37438///
37439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
37440#[inline]
37441#[target_feature(enable = "avx512f")]
37442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37443#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
37444#[rustc_legacy_const_generics(4)]
37445pub unsafe fn _mm_mask_getexp_round_ss<const SAE: i32>(
37446 src: __m128,
37447 k: __mmask8,
37448 a: __m128,
37449 b: __m128,
37450) -> __m128 {
37451 static_assert_sae!(SAE);
37452 let a: f32x4 = a.as_f32x4();
37453 let b: f32x4 = b.as_f32x4();
37454 let src: f32x4 = src.as_f32x4();
37455 let r: f32x4 = vgetexpss(a, b, src, mask:k, SAE);
37456 transmute(src:r)
37457}
37458
37459/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37460/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37461///
37462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
37463#[inline]
37464#[target_feature(enable = "avx512f")]
37465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37466#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
37467#[rustc_legacy_const_generics(3)]
37468pub unsafe fn _mm_maskz_getexp_round_ss<const SAE: i32>(
37469 k: __mmask8,
37470 a: __m128,
37471 b: __m128,
37472) -> __m128 {
37473 static_assert_sae!(SAE);
37474 let a: f32x4 = a.as_f32x4();
37475 let b: f32x4 = b.as_f32x4();
37476 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37477 let r: f32x4 = vgetexpss(a, b, src:zero, mask:k, SAE);
37478 transmute(src:r)
37479}
37480
37481/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37482/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37483///
37484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
37485#[inline]
37486#[target_feature(enable = "avx512f")]
37487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37488#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
37489#[rustc_legacy_const_generics(2)]
37490pub unsafe fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37491 static_assert_sae!(SAE);
37492 let a: f64x2 = a.as_f64x2();
37493 let b: f64x2 = b.as_f64x2();
37494 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37495 let r: f64x2 = vgetexpsd(a, b, src:zero, mask:0b1, SAE);
37496 transmute(src:r)
37497}
37498
37499/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37500/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37501///
37502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
37503#[inline]
37504#[target_feature(enable = "avx512f")]
37505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37506#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
37507#[rustc_legacy_const_generics(4)]
37508pub unsafe fn _mm_mask_getexp_round_sd<const SAE: i32>(
37509 src: __m128d,
37510 k: __mmask8,
37511 a: __m128d,
37512 b: __m128d,
37513) -> __m128d {
37514 static_assert_sae!(SAE);
37515 let a: f64x2 = a.as_f64x2();
37516 let b: f64x2 = b.as_f64x2();
37517 let src: f64x2 = src.as_f64x2();
37518 let r: f64x2 = vgetexpsd(a, b, src, mask:k, SAE);
37519 transmute(src:r)
37520}
37521
37522/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37523/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37524///
37525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
37526#[inline]
37527#[target_feature(enable = "avx512f")]
37528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37529#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
37530#[rustc_legacy_const_generics(3)]
37531pub unsafe fn _mm_maskz_getexp_round_sd<const SAE: i32>(
37532 k: __mmask8,
37533 a: __m128d,
37534 b: __m128d,
37535) -> __m128d {
37536 static_assert_sae!(SAE);
37537 let a: f64x2 = a.as_f64x2();
37538 let b: f64x2 = b.as_f64x2();
37539 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37540 let r: f64x2 = vgetexpsd(a, b, src:zero, mask:k, SAE);
37541 transmute(src:r)
37542}
37543
37544/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37545/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37546/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37547/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37548/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37549/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37550/// The sign is determined by sc which can take the following values:\
37551/// _MM_MANT_SIGN_src // sign = sign(src)\
37552/// _MM_MANT_SIGN_zero // sign = 0\
37553/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37554/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37555///
37556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
37557#[inline]
37558#[target_feature(enable = "avx512f")]
37559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37560#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
37561#[rustc_legacy_const_generics(2, 3, 4)]
37562pub unsafe fn _mm_getmant_round_ss<
37563 const NORM: _MM_MANTISSA_NORM_ENUM,
37564 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37565 const SAE: i32,
37566>(
37567 a: __m128,
37568 b: __m128,
37569) -> __m128 {
37570 static_assert_uimm_bits!(NORM, 4);
37571 static_assert_uimm_bits!(SIGN, 2);
37572 static_assert_mantissas_sae!(SAE);
37573 let a: f32x4 = a.as_f32x4();
37574 let b: f32x4 = b.as_f32x4();
37575 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37576 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:zero, m:0b1, SAE);
37577 transmute(src:r)
37578}
37579
37580/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37581/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37582/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37583/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37584/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37585/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37586/// The sign is determined by sc which can take the following values:\
37587/// _MM_MANT_SIGN_src // sign = sign(src)\
37588/// _MM_MANT_SIGN_zero // sign = 0\
37589/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37590/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37591///
37592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
37593#[inline]
37594#[target_feature(enable = "avx512f")]
37595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37596#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
37597#[rustc_legacy_const_generics(4, 5, 6)]
37598pub unsafe fn _mm_mask_getmant_round_ss<
37599 const NORM: _MM_MANTISSA_NORM_ENUM,
37600 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37601 const SAE: i32,
37602>(
37603 src: __m128,
37604 k: __mmask8,
37605 a: __m128,
37606 b: __m128,
37607) -> __m128 {
37608 static_assert_uimm_bits!(NORM, 4);
37609 static_assert_uimm_bits!(SIGN, 2);
37610 static_assert_mantissas_sae!(SAE);
37611 let a: f32x4 = a.as_f32x4();
37612 let b: f32x4 = b.as_f32x4();
37613 let src: f32x4 = src.as_f32x4();
37614 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, SAE);
37615 transmute(src:r)
37616}
37617
37618/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37619/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37620/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37621/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37622/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37623/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37624/// The sign is determined by sc which can take the following values:\
37625/// _MM_MANT_SIGN_src // sign = sign(src)\
37626/// _MM_MANT_SIGN_zero // sign = 0\
37627/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37628/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37629///
37630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
37631#[inline]
37632#[target_feature(enable = "avx512f")]
37633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37634#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
37635#[rustc_legacy_const_generics(3, 4, 5)]
37636pub unsafe fn _mm_maskz_getmant_round_ss<
37637 const NORM: _MM_MANTISSA_NORM_ENUM,
37638 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37639 const SAE: i32,
37640>(
37641 k: __mmask8,
37642 a: __m128,
37643 b: __m128,
37644) -> __m128 {
37645 static_assert_uimm_bits!(NORM, 4);
37646 static_assert_uimm_bits!(SIGN, 2);
37647 static_assert_mantissas_sae!(SAE);
37648 let a: f32x4 = a.as_f32x4();
37649 let b: f32x4 = b.as_f32x4();
37650 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37651 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:zero, m:k, SAE);
37652 transmute(src:r)
37653}
37654
37655/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37656/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37657/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37658/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37659/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37660/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37661/// The sign is determined by sc which can take the following values:\
37662/// _MM_MANT_SIGN_src // sign = sign(src)\
37663/// _MM_MANT_SIGN_zero // sign = 0\
37664/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37665/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37666///
37667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
37668#[inline]
37669#[target_feature(enable = "avx512f")]
37670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37671#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
37672#[rustc_legacy_const_generics(2, 3, 4)]
37673pub unsafe fn _mm_getmant_round_sd<
37674 const NORM: _MM_MANTISSA_NORM_ENUM,
37675 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37676 const SAE: i32,
37677>(
37678 a: __m128d,
37679 b: __m128d,
37680) -> __m128d {
37681 static_assert_uimm_bits!(NORM, 4);
37682 static_assert_uimm_bits!(SIGN, 2);
37683 static_assert_mantissas_sae!(SAE);
37684 let a: f64x2 = a.as_f64x2();
37685 let b: f64x2 = b.as_f64x2();
37686 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37687 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:zero, m:0b1, SAE);
37688 transmute(src:r)
37689}
37690
37691/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37692/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37693/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37694/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37695/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37696/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37697/// The sign is determined by sc which can take the following values:\
37698/// _MM_MANT_SIGN_src // sign = sign(src)\
37699/// _MM_MANT_SIGN_zero // sign = 0\
37700/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37701/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37702///
37703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
37704#[inline]
37705#[target_feature(enable = "avx512f")]
37706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37707#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
37708#[rustc_legacy_const_generics(4, 5, 6)]
37709pub unsafe fn _mm_mask_getmant_round_sd<
37710 const NORM: _MM_MANTISSA_NORM_ENUM,
37711 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37712 const SAE: i32,
37713>(
37714 src: __m128d,
37715 k: __mmask8,
37716 a: __m128d,
37717 b: __m128d,
37718) -> __m128d {
37719 static_assert_uimm_bits!(NORM, 4);
37720 static_assert_uimm_bits!(SIGN, 2);
37721 static_assert_mantissas_sae!(SAE);
37722 let a: f64x2 = a.as_f64x2();
37723 let b: f64x2 = b.as_f64x2();
37724 let src: f64x2 = src.as_f64x2();
37725 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, SAE);
37726 transmute(src:r)
37727}
37728
37729/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37730/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37731/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37732/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37733/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37734/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37735/// The sign is determined by sc which can take the following values:\
37736/// _MM_MANT_SIGN_src // sign = sign(src)\
37737/// _MM_MANT_SIGN_zero // sign = 0\
37738/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37739/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37740///
37741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
37742#[inline]
37743#[target_feature(enable = "avx512f")]
37744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37745#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
37746#[rustc_legacy_const_generics(3, 4, 5)]
37747pub unsafe fn _mm_maskz_getmant_round_sd<
37748 const NORM: _MM_MANTISSA_NORM_ENUM,
37749 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37750 const SAE: i32,
37751>(
37752 k: __mmask8,
37753 a: __m128d,
37754 b: __m128d,
37755) -> __m128d {
37756 static_assert_uimm_bits!(NORM, 4);
37757 static_assert_uimm_bits!(SIGN, 2);
37758 static_assert_mantissas_sae!(SAE);
37759 let a: f64x2 = a.as_f64x2();
37760 let b: f64x2 = b.as_f64x2();
37761 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37762 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:zero, m:k, SAE);
37763 transmute(src:r)
37764}
37765
37766/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37767/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37768/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37769/// _MM_FROUND_TO_NEG_INF // round down\
37770/// _MM_FROUND_TO_POS_INF // round up\
37771/// _MM_FROUND_TO_ZERO // truncate\
37772/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37773///
37774/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
37776#[inline]
37777#[target_feature(enable = "avx512f")]
37778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37779#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
37780#[rustc_legacy_const_generics(2, 3)]
37781pub unsafe fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37782 a: __m128,
37783 b: __m128,
37784) -> __m128 {
37785 static_assert_uimm_bits!(IMM8, 8);
37786 static_assert_mantissas_sae!(SAE);
37787 let a: f32x4 = a.as_f32x4();
37788 let b: f32x4 = b.as_f32x4();
37789 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37790 let r: f32x4 = vrndscaless(a, b, src:zero, mask:0b11111111, IMM8, SAE);
37791 transmute(src:r)
37792}
37793
37794/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37795/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37796/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37797/// _MM_FROUND_TO_NEG_INF // round down\
37798/// _MM_FROUND_TO_POS_INF // round up\
37799/// _MM_FROUND_TO_ZERO // truncate\
37800/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37801///
37802/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
37804#[inline]
37805#[target_feature(enable = "avx512f")]
37806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37807#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
37808#[rustc_legacy_const_generics(4, 5)]
37809pub unsafe fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37810 src: __m128,
37811 k: __mmask8,
37812 a: __m128,
37813 b: __m128,
37814) -> __m128 {
37815 static_assert_uimm_bits!(IMM8, 8);
37816 static_assert_mantissas_sae!(SAE);
37817 let a: f32x4 = a.as_f32x4();
37818 let b: f32x4 = b.as_f32x4();
37819 let src: f32x4 = src.as_f32x4();
37820 let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, SAE);
37821 transmute(src:r)
37822}
37823
37824/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37825/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37826/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37827/// _MM_FROUND_TO_NEG_INF // round down\
37828/// _MM_FROUND_TO_POS_INF // round up\
37829/// _MM_FROUND_TO_ZERO // truncate\
37830/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37831///
37832/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
37834#[inline]
37835#[target_feature(enable = "avx512f")]
37836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37837#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
37838#[rustc_legacy_const_generics(3, 4)]
37839pub unsafe fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37840 k: __mmask8,
37841 a: __m128,
37842 b: __m128,
37843) -> __m128 {
37844 static_assert_uimm_bits!(IMM8, 8);
37845 static_assert_mantissas_sae!(SAE);
37846 let a: f32x4 = a.as_f32x4();
37847 let b: f32x4 = b.as_f32x4();
37848 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37849 let r: f32x4 = vrndscaless(a, b, src:zero, mask:k, IMM8, SAE);
37850 transmute(src:r)
37851}
37852
37853/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37854/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37855/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37856/// _MM_FROUND_TO_NEG_INF // round down\
37857/// _MM_FROUND_TO_POS_INF // round up\
37858/// _MM_FROUND_TO_ZERO // truncate\
37859/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37860///
37861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
37863#[inline]
37864#[target_feature(enable = "avx512f")]
37865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37866#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
37867#[rustc_legacy_const_generics(2, 3)]
37868pub unsafe fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37869 a: __m128d,
37870 b: __m128d,
37871) -> __m128d {
37872 static_assert_uimm_bits!(IMM8, 8);
37873 static_assert_mantissas_sae!(SAE);
37874 let a: f64x2 = a.as_f64x2();
37875 let b: f64x2 = b.as_f64x2();
37876 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37877 let r: f64x2 = vrndscalesd(a, b, src:zero, mask:0b11111111, IMM8, SAE);
37878 transmute(src:r)
37879}
37880
37881/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37882/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37883/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37884/// _MM_FROUND_TO_NEG_INF // round down\
37885/// _MM_FROUND_TO_POS_INF // round up\
37886/// _MM_FROUND_TO_ZERO // truncate\
37887/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37888///
37889/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
37891#[inline]
37892#[target_feature(enable = "avx512f")]
37893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37894#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
37895#[rustc_legacy_const_generics(4, 5)]
37896pub unsafe fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37897 src: __m128d,
37898 k: __mmask8,
37899 a: __m128d,
37900 b: __m128d,
37901) -> __m128d {
37902 static_assert_uimm_bits!(IMM8, 8);
37903 static_assert_mantissas_sae!(SAE);
37904 let a: f64x2 = a.as_f64x2();
37905 let b: f64x2 = b.as_f64x2();
37906 let src: f64x2 = src.as_f64x2();
37907 let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
37908 transmute(src:r)
37909}
37910
37911/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37913/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
37914/// _MM_FROUND_TO_NEG_INF // round down\
37915/// _MM_FROUND_TO_POS_INF // round up\
37916/// _MM_FROUND_TO_ZERO // truncate\
37917/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
37918///
37919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
37921#[inline]
37922#[target_feature(enable = "avx512f")]
37923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37924#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
37925#[rustc_legacy_const_generics(3, 4)]
37926pub unsafe fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37927 k: __mmask8,
37928 a: __m128d,
37929 b: __m128d,
37930) -> __m128d {
37931 static_assert_uimm_bits!(IMM8, 8);
37932 static_assert_mantissas_sae!(SAE);
37933 let a: f64x2 = a.as_f64x2();
37934 let b: f64x2 = b.as_f64x2();
37935 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
37936 let r: f64x2 = vrndscalesd(a, b, src:zero, mask:k, IMM8, SAE);
37937 transmute(src:r)
37938}
37939
37940/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37941///
37942/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37943/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37944/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37945/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37946/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37947/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37948///
37949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
37950#[inline]
37951#[target_feature(enable = "avx512f")]
37952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37953#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
37954#[rustc_legacy_const_generics(2)]
37955pub unsafe fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37956 static_assert_rounding!(ROUNDING);
37957 let a: f32x4 = a.as_f32x4();
37958 let b: f32x4 = b.as_f32x4();
37959 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
37960 let r: f32x4 = vscalefss(a, b, src:zero, mask:0b11111111, ROUNDING);
37961 transmute(src:r)
37962}
37963
37964/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37965///
37966/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37967/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37968/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37969/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37970/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
37971/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
37972///
37973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
37974#[inline]
37975#[target_feature(enable = "avx512f")]
37976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37977#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
37978#[rustc_legacy_const_generics(4)]
37979pub unsafe fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
37980 src: __m128,
37981 k: __mmask8,
37982 a: __m128,
37983 b: __m128,
37984) -> __m128 {
37985 static_assert_rounding!(ROUNDING);
37986 let a: f32x4 = a.as_f32x4();
37987 let b: f32x4 = b.as_f32x4();
37988 let src: f32x4 = src.as_f32x4();
37989 let r: f32x4 = vscalefss(a, b, src, mask:k, ROUNDING);
37990 transmute(src:r)
37991}
37992
37993/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37994///
37995/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37996/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
37997/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
37998/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
37999/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38000/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38001///
38002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
38003#[inline]
38004#[target_feature(enable = "avx512f")]
38005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38006#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
38007#[rustc_legacy_const_generics(3)]
38008pub unsafe fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(
38009 k: __mmask8,
38010 a: __m128,
38011 b: __m128,
38012) -> __m128 {
38013 static_assert_rounding!(ROUNDING);
38014 let a: f32x4 = a.as_f32x4();
38015 let b: f32x4 = b.as_f32x4();
38016 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
38017 let r: f32x4 = vscalefss(a, b, src:zero, mask:k, ROUNDING);
38018 transmute(src:r)
38019}
38020
38021/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38022///
38023/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38024/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38025/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38026/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38027/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38028/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38029///
38030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
38031#[inline]
38032#[target_feature(enable = "avx512f")]
38033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38034#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
38035#[rustc_legacy_const_generics(2)]
38036pub unsafe fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38037 static_assert_rounding!(ROUNDING);
38038 let a: f64x2 = a.as_f64x2();
38039 let b: f64x2 = b.as_f64x2();
38040 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
38041 let r: f64x2 = vscalefsd(a, b, src:zero, mask:0b11111111, ROUNDING);
38042 transmute(src:r)
38043}
38044
38045/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38046///
38047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38048/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38049/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38050/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38051/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38052/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38053///
38054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
38055#[inline]
38056#[target_feature(enable = "avx512f")]
38057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38058#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
38059#[rustc_legacy_const_generics(4)]
38060pub unsafe fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
38061 src: __m128d,
38062 k: __mmask8,
38063 a: __m128d,
38064 b: __m128d,
38065) -> __m128d {
38066 let a: f64x2 = a.as_f64x2();
38067 let b: f64x2 = b.as_f64x2();
38068 let src: f64x2 = src.as_f64x2();
38069 let r: f64x2 = vscalefsd(a, b, src, mask:k, ROUNDING);
38070 transmute(src:r)
38071}
38072
38073/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38074///
38075/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38076/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38077/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38078/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38079/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38080/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38081///
38082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
38083#[inline]
38084#[target_feature(enable = "avx512f")]
38085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38086#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
38087#[rustc_legacy_const_generics(3)]
38088pub unsafe fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
38089 k: __mmask8,
38090 a: __m128d,
38091 b: __m128d,
38092) -> __m128d {
38093 static_assert_rounding!(ROUNDING);
38094 let a: f64x2 = a.as_f64x2();
38095 let b: f64x2 = b.as_f64x2();
38096 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
38097 let r: f64x2 = vscalefsd(a, b, src:zero, mask:k, ROUNDING);
38098 transmute(src:r)
38099}
38100
38101/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38102///
38103/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38104/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38105/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38106/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38107/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38108/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38109///
38110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
38111#[inline]
38112#[target_feature(enable = "avx512f")]
38113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38114#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
38115#[rustc_legacy_const_generics(3)]
38116pub unsafe fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38117 static_assert_rounding!(ROUNDING);
38118 let extracta: f32 = simd_extract!(a, 0);
38119 let extractb: f32 = simd_extract!(b, 0);
38120 let extractc: f32 = simd_extract!(c, 0);
38121 let r: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38122 simd_insert!(a, 0, r)
38123}
38124
38125/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38126///
38127/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38128/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38129/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38130/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38131/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38132/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38133///
38134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
38135#[inline]
38136#[target_feature(enable = "avx512f")]
38137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38138#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
38139#[rustc_legacy_const_generics(4)]
38140pub unsafe fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
38141 a: __m128,
38142 k: __mmask8,
38143 b: __m128,
38144 c: __m128,
38145) -> __m128 {
38146 static_assert_rounding!(ROUNDING);
38147 let mut fmadd: f32 = simd_extract!(a, 0);
38148 if (k & 0b00000001) != 0 {
38149 let extractb: f32 = simd_extract!(b, 0);
38150 let extractc: f32 = simd_extract!(c, 0);
38151 fmadd = vfmadd132ss(a:fmadd, b:extractb, c:extractc, ROUNDING);
38152 }
38153 simd_insert!(a, 0, fmadd)
38154}
38155
38156/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38157///
38158/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38159/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38160/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38161/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38162/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38163/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38164///
38165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
38166#[inline]
38167#[target_feature(enable = "avx512f")]
38168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38169#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
38170#[rustc_legacy_const_generics(4)]
38171pub unsafe fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
38172 k: __mmask8,
38173 a: __m128,
38174 b: __m128,
38175 c: __m128,
38176) -> __m128 {
38177 static_assert_rounding!(ROUNDING);
38178 let mut fmadd: f32 = 0.;
38179 if (k & 0b00000001) != 0 {
38180 let extracta: f32 = simd_extract!(a, 0);
38181 let extractb: f32 = simd_extract!(b, 0);
38182 let extractc: f32 = simd_extract!(c, 0);
38183 fmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38184 }
38185 simd_insert!(a, 0, fmadd)
38186}
38187
38188/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38189///
38190/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38191/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38192/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38193/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38194/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38195/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38196///
38197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
38198#[inline]
38199#[target_feature(enable = "avx512f")]
38200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38201#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
38202#[rustc_legacy_const_generics(4)]
38203pub unsafe fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
38204 a: __m128,
38205 b: __m128,
38206 c: __m128,
38207 k: __mmask8,
38208) -> __m128 {
38209 static_assert_rounding!(ROUNDING);
38210 let mut fmadd: f32 = simd_extract!(c, 0);
38211 if (k & 0b00000001) != 0 {
38212 let extracta: f32 = simd_extract!(a, 0);
38213 let extractb: f32 = simd_extract!(b, 0);
38214 fmadd = vfmadd132ss(a:extracta, b:extractb, c:fmadd, ROUNDING);
38215 }
38216 simd_insert!(c, 0, fmadd)
38217}
38218
38219/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38220///
38221/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38222/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38223/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38224/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38225/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38226/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38227///
38228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
38229#[inline]
38230#[target_feature(enable = "avx512f")]
38231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38232#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
38233#[rustc_legacy_const_generics(3)]
38234pub unsafe fn _mm_fmadd_round_sd<const ROUNDING: i32>(
38235 a: __m128d,
38236 b: __m128d,
38237 c: __m128d,
38238) -> __m128d {
38239 static_assert_rounding!(ROUNDING);
38240 let extracta: f64 = simd_extract!(a, 0);
38241 let extractb: f64 = simd_extract!(b, 0);
38242 let extractc: f64 = simd_extract!(c, 0);
38243 let fmadd: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38244 simd_insert!(a, 0, fmadd)
38245}
38246
38247/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38248///
38249/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38250/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38251/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38252/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38253/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38254/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38255///
38256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
38257#[inline]
38258#[target_feature(enable = "avx512f")]
38259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38260#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
38261#[rustc_legacy_const_generics(4)]
38262pub unsafe fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
38263 a: __m128d,
38264 k: __mmask8,
38265 b: __m128d,
38266 c: __m128d,
38267) -> __m128d {
38268 static_assert_rounding!(ROUNDING);
38269 let mut fmadd: f64 = simd_extract!(a, 0);
38270 if (k & 0b00000001) != 0 {
38271 let extractb: f64 = simd_extract!(b, 0);
38272 let extractc: f64 = simd_extract!(c, 0);
38273 fmadd = vfmadd132sd(a:fmadd, b:extractb, c:extractc, ROUNDING);
38274 }
38275 simd_insert!(a, 0, fmadd)
38276}
38277
38278/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38279///
38280/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38281/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38282/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38283/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38284/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38285/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38286///
38287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
38288#[inline]
38289#[target_feature(enable = "avx512f")]
38290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38291#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
38292#[rustc_legacy_const_generics(4)]
38293pub unsafe fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
38294 k: __mmask8,
38295 a: __m128d,
38296 b: __m128d,
38297 c: __m128d,
38298) -> __m128d {
38299 static_assert_rounding!(ROUNDING);
38300 let mut fmadd: f64 = 0.;
38301 if (k & 0b00000001) != 0 {
38302 let extracta: f64 = simd_extract!(a, 0);
38303 let extractb: f64 = simd_extract!(b, 0);
38304 let extractc: f64 = simd_extract!(c, 0);
38305 fmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38306 }
38307 simd_insert!(a, 0, fmadd)
38308}
38309
38310/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38311///
38312/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38313/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38314/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38315/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38316/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38317/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38318///
38319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_Sd&expand=2571)
38320#[inline]
38321#[target_feature(enable = "avx512f")]
38322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38323#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
38324#[rustc_legacy_const_generics(4)]
38325pub unsafe fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
38326 a: __m128d,
38327 b: __m128d,
38328 c: __m128d,
38329 k: __mmask8,
38330) -> __m128d {
38331 static_assert_rounding!(ROUNDING);
38332 let mut fmadd: f64 = simd_extract!(c, 0);
38333 if (k & 0b00000001) != 0 {
38334 let extracta: f64 = simd_extract!(a, 0);
38335 let extractb: f64 = simd_extract!(b, 0);
38336 fmadd = vfmadd132sd(a:extracta, b:extractb, c:fmadd, ROUNDING);
38337 }
38338 simd_insert!(c, 0, fmadd)
38339}
38340
38341/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38342///
38343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38344/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38345/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38346/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38347/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38348/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38349///
38350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
38351#[inline]
38352#[target_feature(enable = "avx512f")]
38353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38354#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
38355#[rustc_legacy_const_generics(3)]
38356pub unsafe fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38357 static_assert_rounding!(ROUNDING);
38358 let extracta: f32 = simd_extract!(a, 0);
38359 let extractb: f32 = simd_extract!(b, 0);
38360 let extractc: f32 = simd_extract!(c, 0);
38361 let extractc: f32 = -extractc;
38362 let fmsub: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38363 simd_insert!(a, 0, fmsub)
38364}
38365
38366/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38367///
38368/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38369/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38370/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38371/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38372/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38373/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38374///
38375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
38376#[inline]
38377#[target_feature(enable = "avx512f")]
38378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38379#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
38380#[rustc_legacy_const_generics(4)]
38381pub unsafe fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
38382 a: __m128,
38383 k: __mmask8,
38384 b: __m128,
38385 c: __m128,
38386) -> __m128 {
38387 static_assert_rounding!(ROUNDING);
38388 let mut fmsub: f32 = simd_extract!(a, 0);
38389 if (k & 0b00000001) != 0 {
38390 let extractb: f32 = simd_extract!(b, 0);
38391 let extractc: f32 = simd_extract!(c, 0);
38392 let extractc: f32 = -extractc;
38393 fmsub = vfmadd132ss(a:fmsub, b:extractb, c:extractc, ROUNDING);
38394 }
38395 simd_insert!(a, 0, fmsub)
38396}
38397
38398/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38399///
38400/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38401/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38402/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38403/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38404/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38405/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38406///
38407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
38408#[inline]
38409#[target_feature(enable = "avx512f")]
38410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38411#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
38412#[rustc_legacy_const_generics(4)]
38413pub unsafe fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
38414 k: __mmask8,
38415 a: __m128,
38416 b: __m128,
38417 c: __m128,
38418) -> __m128 {
38419 static_assert_rounding!(ROUNDING);
38420 let mut fmsub: f32 = 0.;
38421 if (k & 0b00000001) != 0 {
38422 let extracta: f32 = simd_extract!(a, 0);
38423 let extractb: f32 = simd_extract!(b, 0);
38424 let extractc: f32 = simd_extract!(c, 0);
38425 let extractc: f32 = -extractc;
38426 fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38427 }
38428 simd_insert!(a, 0, fmsub)
38429}
38430
38431/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38432///
38433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38434/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38435/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38436/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38437/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38438/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38439///
38440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
38441#[inline]
38442#[target_feature(enable = "avx512f")]
38443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38444#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
38445#[rustc_legacy_const_generics(4)]
38446pub unsafe fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
38447 a: __m128,
38448 b: __m128,
38449 c: __m128,
38450 k: __mmask8,
38451) -> __m128 {
38452 static_assert_rounding!(ROUNDING);
38453 let mut fmsub: f32 = simd_extract!(c, 0);
38454 if (k & 0b00000001) != 0 {
38455 let extracta: f32 = simd_extract!(a, 0);
38456 let extractb: f32 = simd_extract!(b, 0);
38457 let extractc: f32 = -fmsub;
38458 fmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38459 }
38460 simd_insert!(c, 0, fmsub)
38461}
38462
38463/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38464///
38465/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38466/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38467/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38468/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38469/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38470/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38471///
38472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
38473#[inline]
38474#[target_feature(enable = "avx512f")]
38475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38476#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
38477#[rustc_legacy_const_generics(3)]
38478pub unsafe fn _mm_fmsub_round_sd<const ROUNDING: i32>(
38479 a: __m128d,
38480 b: __m128d,
38481 c: __m128d,
38482) -> __m128d {
38483 static_assert_rounding!(ROUNDING);
38484 let extracta: f64 = simd_extract!(a, 0);
38485 let extractb: f64 = simd_extract!(b, 0);
38486 let extractc: f64 = simd_extract!(c, 0);
38487 let extractc: f64 = -extractc;
38488 let fmsub: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38489 simd_insert!(a, 0, fmsub)
38490}
38491
38492/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38493///
38494/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38495/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38496/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38497/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38498/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38499/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38500///
38501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
38502#[inline]
38503#[target_feature(enable = "avx512f")]
38504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38505#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
38506#[rustc_legacy_const_generics(4)]
38507pub unsafe fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
38508 a: __m128d,
38509 k: __mmask8,
38510 b: __m128d,
38511 c: __m128d,
38512) -> __m128d {
38513 static_assert_rounding!(ROUNDING);
38514 let mut fmsub: f64 = simd_extract!(a, 0);
38515 if (k & 0b00000001) != 0 {
38516 let extractb: f64 = simd_extract!(b, 0);
38517 let extractc: f64 = simd_extract!(c, 0);
38518 let extractc: f64 = -extractc;
38519 fmsub = vfmadd132sd(a:fmsub, b:extractb, c:extractc, ROUNDING);
38520 }
38521 simd_insert!(a, 0, fmsub)
38522}
38523
38524/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38525///
38526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38527/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38528/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38529/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38530/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38531/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38532///
38533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
38534#[inline]
38535#[target_feature(enable = "avx512f")]
38536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38537#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
38538#[rustc_legacy_const_generics(4)]
38539pub unsafe fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
38540 k: __mmask8,
38541 a: __m128d,
38542 b: __m128d,
38543 c: __m128d,
38544) -> __m128d {
38545 static_assert_rounding!(ROUNDING);
38546 let mut fmsub: f64 = 0.;
38547 if (k & 0b00000001) != 0 {
38548 let extracta: f64 = simd_extract!(a, 0);
38549 let extractb: f64 = simd_extract!(b, 0);
38550 let extractc: f64 = simd_extract!(c, 0);
38551 let extractc: f64 = -extractc;
38552 fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38553 }
38554 simd_insert!(a, 0, fmsub)
38555}
38556
38557/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38558///
38559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38560/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38561/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38562/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38563/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38564/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38565///
38566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
38567#[inline]
38568#[target_feature(enable = "avx512f")]
38569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38570#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
38571#[rustc_legacy_const_generics(4)]
38572pub unsafe fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
38573 a: __m128d,
38574 b: __m128d,
38575 c: __m128d,
38576 k: __mmask8,
38577) -> __m128d {
38578 static_assert_rounding!(ROUNDING);
38579 let mut fmsub: f64 = simd_extract!(c, 0);
38580 if (k & 0b00000001) != 0 {
38581 let extracta: f64 = simd_extract!(a, 0);
38582 let extractb: f64 = simd_extract!(b, 0);
38583 let extractc: f64 = -fmsub;
38584 fmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38585 }
38586 simd_insert!(c, 0, fmsub)
38587}
38588
38589/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38590///
38591/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38592/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38593/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38594/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38595/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38596/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38597///
38598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
38599#[inline]
38600#[target_feature(enable = "avx512f")]
38601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38602#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
38603#[rustc_legacy_const_generics(3)]
38604pub unsafe fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38605 static_assert_rounding!(ROUNDING);
38606 let extracta: f32 = simd_extract!(a, 0);
38607 let extracta: f32 = -extracta;
38608 let extractb: f32 = simd_extract!(b, 0);
38609 let extractc: f32 = simd_extract!(c, 0);
38610 let fnmadd: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38611 simd_insert!(a, 0, fnmadd)
38612}
38613
38614/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38615///
38616/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38617/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38618/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38619/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38620/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38621/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38622///
38623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
38624#[inline]
38625#[target_feature(enable = "avx512f")]
38626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38627#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
38628#[rustc_legacy_const_generics(4)]
38629pub unsafe fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
38630 a: __m128,
38631 k: __mmask8,
38632 b: __m128,
38633 c: __m128,
38634) -> __m128 {
38635 static_assert_rounding!(ROUNDING);
38636 let mut fnmadd: f32 = simd_extract!(a, 0);
38637 if (k & 0b00000001) != 0 {
38638 let extracta: f32 = -fnmadd;
38639 let extractb: f32 = simd_extract!(b, 0);
38640 let extractc: f32 = simd_extract!(c, 0);
38641 fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38642 }
38643 simd_insert!(a, 0, fnmadd)
38644}
38645
38646/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38647///
38648/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38649/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38650/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38651/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38652/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38653/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38654///
38655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
38656#[inline]
38657#[target_feature(enable = "avx512f")]
38658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38659#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
38660#[rustc_legacy_const_generics(4)]
38661pub unsafe fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
38662 k: __mmask8,
38663 a: __m128,
38664 b: __m128,
38665 c: __m128,
38666) -> __m128 {
38667 static_assert_rounding!(ROUNDING);
38668 let mut fnmadd: f32 = 0.;
38669 if (k & 0b00000001) != 0 {
38670 let extracta: f32 = simd_extract!(a, 0);
38671 let extracta: f32 = -extracta;
38672 let extractb: f32 = simd_extract!(b, 0);
38673 let extractc: f32 = simd_extract!(c, 0);
38674 fnmadd = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38675 }
38676 simd_insert!(a, 0, fnmadd)
38677}
38678
38679/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38680///
38681/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38682/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38683/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38684/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38685/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38686/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38687///
38688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
38689#[inline]
38690#[target_feature(enable = "avx512f")]
38691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38692#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
38693#[rustc_legacy_const_generics(4)]
38694pub unsafe fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
38695 a: __m128,
38696 b: __m128,
38697 c: __m128,
38698 k: __mmask8,
38699) -> __m128 {
38700 static_assert_rounding!(ROUNDING);
38701 let mut fnmadd: f32 = simd_extract!(c, 0);
38702 if (k & 0b00000001) != 0 {
38703 let extracta: f32 = simd_extract!(a, 0);
38704 let extracta: f32 = -extracta;
38705 let extractb: f32 = simd_extract!(b, 0);
38706 fnmadd = vfmadd132ss(a:extracta, b:extractb, c:fnmadd, ROUNDING);
38707 }
38708 simd_insert!(c, 0, fnmadd)
38709}
38710
38711/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38712///
38713/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38714/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38715/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38716/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38717/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38718/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38719///
38720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
38721#[inline]
38722#[target_feature(enable = "avx512f")]
38723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38724#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
38725#[rustc_legacy_const_generics(3)]
38726pub unsafe fn _mm_fnmadd_round_sd<const ROUNDING: i32>(
38727 a: __m128d,
38728 b: __m128d,
38729 c: __m128d,
38730) -> __m128d {
38731 static_assert_rounding!(ROUNDING);
38732 let extracta: f64 = simd_extract!(a, 0);
38733 let extracta: f64 = -extracta;
38734 let extractb: f64 = simd_extract!(b, 0);
38735 let extractc: f64 = simd_extract!(c, 0);
38736 let fnmadd: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38737 simd_insert!(a, 0, fnmadd)
38738}
38739
38740/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38741///
38742/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38743/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38744/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38745/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38746/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38747/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38748///
38749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
38750#[inline]
38751#[target_feature(enable = "avx512f")]
38752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38753#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
38754#[rustc_legacy_const_generics(4)]
38755pub unsafe fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
38756 a: __m128d,
38757 k: __mmask8,
38758 b: __m128d,
38759 c: __m128d,
38760) -> __m128d {
38761 static_assert_rounding!(ROUNDING);
38762 let mut fnmadd: f64 = simd_extract!(a, 0);
38763 if (k & 0b00000001) != 0 {
38764 let extracta: f64 = -fnmadd;
38765 let extractb: f64 = simd_extract!(b, 0);
38766 let extractc: f64 = simd_extract!(c, 0);
38767 fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38768 }
38769 simd_insert!(a, 0, fnmadd)
38770}
38771
38772/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38773///
38774/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38775/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38776/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38777/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38778/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38779/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38780///
38781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
38782#[inline]
38783#[target_feature(enable = "avx512f")]
38784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38785#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
38786#[rustc_legacy_const_generics(4)]
38787pub unsafe fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
38788 k: __mmask8,
38789 a: __m128d,
38790 b: __m128d,
38791 c: __m128d,
38792) -> __m128d {
38793 static_assert_rounding!(ROUNDING);
38794 let mut fnmadd: f64 = 0.;
38795 if (k & 0b00000001) != 0 {
38796 let extracta: f64 = simd_extract!(a, 0);
38797 let extracta: f64 = -extracta;
38798 let extractb: f64 = simd_extract!(b, 0);
38799 let extractc: f64 = simd_extract!(c, 0);
38800 fnmadd = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38801 }
38802 simd_insert!(a, 0, fnmadd)
38803}
38804
38805/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38806///
38807/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38808/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38809/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38810/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38811/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38812/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38813///
38814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_Sd&expand=2737)
38815#[inline]
38816#[target_feature(enable = "avx512f")]
38817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38818#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
38819#[rustc_legacy_const_generics(4)]
38820pub unsafe fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
38821 a: __m128d,
38822 b: __m128d,
38823 c: __m128d,
38824 k: __mmask8,
38825) -> __m128d {
38826 static_assert_rounding!(ROUNDING);
38827 let mut fnmadd: f64 = simd_extract!(c, 0);
38828 if (k & 0b00000001) != 0 {
38829 let extracta: f64 = simd_extract!(a, 0);
38830 let extracta: f64 = -extracta;
38831 let extractb: f64 = simd_extract!(b, 0);
38832 fnmadd = vfmadd132sd(a:extracta, b:extractb, c:fnmadd, ROUNDING);
38833 }
38834 simd_insert!(c, 0, fnmadd)
38835}
38836
38837/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38838///
38839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38840/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38841/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38842/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38843/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38844/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38845///
38846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
38847#[inline]
38848#[target_feature(enable = "avx512f")]
38849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38850#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))]
38851#[rustc_legacy_const_generics(3)]
38852pub unsafe fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38853 static_assert_rounding!(ROUNDING);
38854 let extracta: f32 = simd_extract!(a, 0);
38855 let extracta: f32 = -extracta;
38856 let extractb: f32 = simd_extract!(b, 0);
38857 let extractc: f32 = simd_extract!(c, 0);
38858 let extractc: f32 = -extractc;
38859 let fnmsub: f32 = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38860 simd_insert!(a, 0, fnmsub)
38861}
38862
38863/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38864///
38865/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38866/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38867/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38868/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38869/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38870/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38871///
38872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
38873#[inline]
38874#[target_feature(enable = "avx512f")]
38875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38876#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))]
38877#[rustc_legacy_const_generics(4)]
38878pub unsafe fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
38879 a: __m128,
38880 k: __mmask8,
38881 b: __m128,
38882 c: __m128,
38883) -> __m128 {
38884 static_assert_rounding!(ROUNDING);
38885 let mut fnmsub: f32 = simd_extract!(a, 0);
38886 if (k & 0b00000001) != 0 {
38887 let extracta: f32 = -fnmsub;
38888 let extractb: f32 = simd_extract!(b, 0);
38889 let extractc: f32 = simd_extract!(c, 0);
38890 let extractc: f32 = -extractc;
38891 fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38892 }
38893 simd_insert!(a, 0, fnmsub)
38894}
38895
38896/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38897///
38898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38899/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38900/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38901/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38902/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38903/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38904///
38905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
38906#[inline]
38907#[target_feature(enable = "avx512f")]
38908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38909#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))]
38910#[rustc_legacy_const_generics(4)]
38911pub unsafe fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
38912 k: __mmask8,
38913 a: __m128,
38914 b: __m128,
38915 c: __m128,
38916) -> __m128 {
38917 static_assert_rounding!(ROUNDING);
38918 let mut fnmsub: f32 = 0.;
38919 if (k & 0b00000001) != 0 {
38920 let extracta: f32 = simd_extract!(a, 0);
38921 let extracta: f32 = -extracta;
38922 let extractb: f32 = simd_extract!(b, 0);
38923 let extractc: f32 = simd_extract!(c, 0);
38924 let extractc: f32 = -extractc;
38925 fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38926 }
38927 simd_insert!(a, 0, fnmsub)
38928}
38929
38930/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38931///
38932/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38933/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38934/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38935/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38936/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38937/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38938///
38939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
38940#[inline]
38941#[target_feature(enable = "avx512f")]
38942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38943#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))]
38944#[rustc_legacy_const_generics(4)]
38945pub unsafe fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
38946 a: __m128,
38947 b: __m128,
38948 c: __m128,
38949 k: __mmask8,
38950) -> __m128 {
38951 static_assert_rounding!(ROUNDING);
38952 let mut fnmsub: f32 = simd_extract!(c, 0);
38953 if (k & 0b00000001) != 0 {
38954 let extracta: f32 = simd_extract!(a, 0);
38955 let extracta: f32 = -extracta;
38956 let extractb: f32 = simd_extract!(b, 0);
38957 let extractc: f32 = -fnmsub;
38958 fnmsub = vfmadd132ss(a:extracta, b:extractb, c:extractc, ROUNDING);
38959 }
38960 simd_insert!(c, 0, fnmsub)
38961}
38962
38963/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38964///
38965/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38966/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38967/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38968/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38969/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
38970/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
38971///
38972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
38973#[inline]
38974#[target_feature(enable = "avx512f")]
38975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38976#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))]
38977#[rustc_legacy_const_generics(3)]
38978pub unsafe fn _mm_fnmsub_round_sd<const ROUNDING: i32>(
38979 a: __m128d,
38980 b: __m128d,
38981 c: __m128d,
38982) -> __m128d {
38983 static_assert_rounding!(ROUNDING);
38984 let extracta: f64 = simd_extract!(a, 0);
38985 let extracta: f64 = -extracta;
38986 let extractb: f64 = simd_extract!(b, 0);
38987 let extractc: f64 = simd_extract!(c, 0);
38988 let extractc: f64 = -extractc;
38989 let fnmsub: f64 = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
38990 simd_insert!(a, 0, fnmsub)
38991}
38992
38993/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38994///
38995/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38996/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
38997/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
38998/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
38999/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39000/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39001///
39002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
39003#[inline]
39004#[target_feature(enable = "avx512f")]
39005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39006#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))]
39007#[rustc_legacy_const_generics(4)]
39008pub unsafe fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
39009 a: __m128d,
39010 k: __mmask8,
39011 b: __m128d,
39012 c: __m128d,
39013) -> __m128d {
39014 static_assert_rounding!(ROUNDING);
39015 let mut fnmsub: f64 = simd_extract!(a, 0);
39016 if (k & 0b00000001) != 0 {
39017 let extracta: f64 = -fnmsub;
39018 let extractb: f64 = simd_extract!(b, 0);
39019 let extractc: f64 = simd_extract!(c, 0);
39020 let extractc: f64 = -extractc;
39021 fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
39022 }
39023 simd_insert!(a, 0, fnmsub)
39024}
39025
39026/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39027///
39028/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39029/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39030/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39031/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39032/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39033/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39034///
39035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
39036#[inline]
39037#[target_feature(enable = "avx512f")]
39038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39039#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))]
39040#[rustc_legacy_const_generics(4)]
39041pub unsafe fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
39042 k: __mmask8,
39043 a: __m128d,
39044 b: __m128d,
39045 c: __m128d,
39046) -> __m128d {
39047 static_assert_rounding!(ROUNDING);
39048 let mut fnmsub: f64 = 0.;
39049 if (k & 0b00000001) != 0 {
39050 let extracta: f64 = simd_extract!(a, 0);
39051 let extracta: f64 = -extracta;
39052 let extractb: f64 = simd_extract!(b, 0);
39053 let extractc: f64 = simd_extract!(c, 0);
39054 let extractc: f64 = -extractc;
39055 fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
39056 }
39057 simd_insert!(a, 0, fnmsub)
39058}
39059
39060/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39061///
39062/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39063/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39064/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39065/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39066/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39067/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39068///
39069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
39070#[inline]
39071#[target_feature(enable = "avx512f")]
39072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39073#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))]
39074#[rustc_legacy_const_generics(4)]
39075pub unsafe fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
39076 a: __m128d,
39077 b: __m128d,
39078 c: __m128d,
39079 k: __mmask8,
39080) -> __m128d {
39081 static_assert_rounding!(ROUNDING);
39082 let mut fnmsub: f64 = simd_extract!(c, 0);
39083 if (k & 0b00000001) != 0 {
39084 let extracta: f64 = simd_extract!(a, 0);
39085 let extracta: f64 = -extracta;
39086 let extractb: f64 = simd_extract!(b, 0);
39087 let extractc: f64 = -fnmsub;
39088 fnmsub = vfmadd132sd(a:extracta, b:extractb, c:extractc, ROUNDING);
39089 }
39090 simd_insert!(c, 0, fnmsub)
39091}
39092
39093/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
39094///
39095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
39096#[inline]
39097#[target_feature(enable = "avx512f")]
39098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39099#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
39100#[rustc_legacy_const_generics(3)]
39101pub unsafe fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
39102 static_assert_uimm_bits!(IMM8, 8);
39103 let a: f32x4 = a.as_f32x4();
39104 let b: f32x4 = b.as_f32x4();
39105 let c: i32x4 = c.as_i32x4();
39106 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
39107 let fixupimm: f32 = simd_extract!(r, 0);
39108 let r: f32x4 = simd_insert!(a, 0, fixupimm);
39109 transmute(src:r)
39110}
39111
39112/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
39113///
39114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
39115#[inline]
39116#[target_feature(enable = "avx512f")]
39117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39118#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
39119#[rustc_legacy_const_generics(4)]
39120pub unsafe fn _mm_mask_fixupimm_ss<const IMM8: i32>(
39121 a: __m128,
39122 k: __mmask8,
39123 b: __m128,
39124 c: __m128i,
39125) -> __m128 {
39126 static_assert_uimm_bits!(IMM8, 8);
39127 let a: f32x4 = a.as_f32x4();
39128 let b: f32x4 = b.as_f32x4();
39129 let c: i32x4 = c.as_i32x4();
39130 let fixupimm: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39131 let fixupimm: f32 = simd_extract!(fixupimm, 0);
39132 let r: f32x4 = simd_insert!(a, 0, fixupimm);
39133 transmute(src:r)
39134}
39135
39136/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
39137///
39138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
39139#[inline]
39140#[target_feature(enable = "avx512f")]
39141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39142#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
39143#[rustc_legacy_const_generics(4)]
39144pub unsafe fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
39145 k: __mmask8,
39146 a: __m128,
39147 b: __m128,
39148 c: __m128i,
39149) -> __m128 {
39150 static_assert_uimm_bits!(IMM8, 8);
39151 let a: f32x4 = a.as_f32x4();
39152 let b: f32x4 = b.as_f32x4();
39153 let c: i32x4 = c.as_i32x4();
39154 let fixupimm: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39155 let fixupimm: f32 = simd_extract!(fixupimm, 0);
39156 let r: f32x4 = simd_insert!(a, 0, fixupimm);
39157 transmute(src:r)
39158}
39159
39160/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
39161///
39162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
39163#[inline]
39164#[target_feature(enable = "avx512f")]
39165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39166#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
39167#[rustc_legacy_const_generics(3)]
39168pub unsafe fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
39169 static_assert_uimm_bits!(IMM8, 8);
39170 let a: f64x2 = a.as_f64x2();
39171 let b: f64x2 = b.as_f64x2();
39172 let c: i64x2 = c.as_i64x2();
39173 let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
39174 let fixupimm: f64 = simd_extract!(fixupimm, 0);
39175 let r: f64x2 = simd_insert!(a, 0, fixupimm);
39176 transmute(src:r)
39177}
39178
39179/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
39180///
39181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
39182#[inline]
39183#[target_feature(enable = "avx512f")]
39184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39185#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
39186#[rustc_legacy_const_generics(4)]
39187pub unsafe fn _mm_mask_fixupimm_sd<const IMM8: i32>(
39188 a: __m128d,
39189 k: __mmask8,
39190 b: __m128d,
39191 c: __m128i,
39192) -> __m128d {
39193 static_assert_uimm_bits!(IMM8, 8);
39194 let a: f64x2 = a.as_f64x2();
39195 let b: f64x2 = b.as_f64x2();
39196 let c: i64x2 = c.as_i64x2();
39197 let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39198 let fixupimm: f64 = simd_extract!(fixupimm, 0);
39199 let r: f64x2 = simd_insert!(a, 0, fixupimm);
39200 transmute(src:r)
39201}
39202
39203/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
39204///
39205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
39206#[inline]
39207#[target_feature(enable = "avx512f")]
39208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39209#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
39210#[rustc_legacy_const_generics(4)]
39211pub unsafe fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
39212 k: __mmask8,
39213 a: __m128d,
39214 b: __m128d,
39215 c: __m128i,
39216) -> __m128d {
39217 static_assert_uimm_bits!(IMM8, 8);
39218 let a: f64x2 = a.as_f64x2();
39219 let b: f64x2 = b.as_f64x2();
39220 let c: i64x2 = c.as_i64x2();
39221 let fixupimm: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
39222 let fixupimm: f64 = simd_extract!(fixupimm, 0);
39223 let r: f64x2 = simd_insert!(a, 0, fixupimm);
39224 transmute(src:r)
39225}
39226
39227/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
39228/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39229///
39230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
39231#[inline]
39232#[target_feature(enable = "avx512f")]
39233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39234#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
39235#[rustc_legacy_const_generics(3, 4)]
39236pub unsafe fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
39237 a: __m128,
39238 b: __m128,
39239 c: __m128i,
39240) -> __m128 {
39241 static_assert_uimm_bits!(IMM8, 8);
39242 static_assert_mantissas_sae!(SAE);
39243 let a: f32x4 = a.as_f32x4();
39244 let b: f32x4 = b.as_f32x4();
39245 let c: i32x4 = c.as_i32x4();
39246 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:0b11111111, SAE);
39247 let fixupimm: f32 = simd_extract!(r, 0);
39248 let r: f32x4 = simd_insert!(a, 0, fixupimm);
39249 transmute(src:r)
39250}
39251
39252/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
39253/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39254///
39255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
39256#[inline]
39257#[target_feature(enable = "avx512f")]
39258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39259#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
39260#[rustc_legacy_const_generics(4, 5)]
39261pub unsafe fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
39262 a: __m128,
39263 k: __mmask8,
39264 b: __m128,
39265 c: __m128i,
39266) -> __m128 {
39267 static_assert_uimm_bits!(IMM8, 8);
39268 static_assert_mantissas_sae!(SAE);
39269 let a: f32x4 = a.as_f32x4();
39270 let b: f32x4 = b.as_f32x4();
39271 let c: i32x4 = c.as_i32x4();
39272 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
39273 let fixupimm: f32 = simd_extract!(r, 0);
39274 let r: f32x4 = simd_insert!(a, 0, fixupimm);
39275 transmute(src:r)
39276}
39277
39278/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
39279/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39280///
39281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
39282#[inline]
39283#[target_feature(enable = "avx512f")]
39284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39285#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
39286#[rustc_legacy_const_generics(4, 5)]
39287pub unsafe fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
39288 k: __mmask8,
39289 a: __m128,
39290 b: __m128,
39291 c: __m128i,
39292) -> __m128 {
39293 static_assert_uimm_bits!(IMM8, 8);
39294 static_assert_mantissas_sae!(SAE);
39295 let a: f32x4 = a.as_f32x4();
39296 let b: f32x4 = b.as_f32x4();
39297 let c: i32x4 = c.as_i32x4();
39298 let r: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
39299 let fixupimm: f32 = simd_extract!(r, 0);
39300 let r: f32x4 = simd_insert!(a, 0, fixupimm);
39301 transmute(src:r)
39302}
39303
39304/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39305/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39306///
39307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
39308#[inline]
39309#[target_feature(enable = "avx512f")]
39310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39311#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
39312#[rustc_legacy_const_generics(3, 4)]
39313pub unsafe fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39314 a: __m128d,
39315 b: __m128d,
39316 c: __m128i,
39317) -> __m128d {
39318 static_assert_uimm_bits!(IMM8, 8);
39319 static_assert_mantissas_sae!(SAE);
39320 let a: f64x2 = a.as_f64x2();
39321 let b: f64x2 = b.as_f64x2();
39322 let c: i64x2 = c.as_i64x2();
39323 let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, SAE);
39324 let fixupimm: f64 = simd_extract!(r, 0);
39325 let r: f64x2 = simd_insert!(a, 0, fixupimm);
39326 transmute(src:r)
39327}
39328
39329/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39330/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39331///
39332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
39333#[inline]
39334#[target_feature(enable = "avx512f")]
39335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39336#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
39337#[rustc_legacy_const_generics(4, 5)]
39338pub unsafe fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39339 a: __m128d,
39340 k: __mmask8,
39341 b: __m128d,
39342 c: __m128i,
39343) -> __m128d {
39344 static_assert_uimm_bits!(IMM8, 8);
39345 static_assert_mantissas_sae!(SAE);
39346 let a: f64x2 = a.as_f64x2();
39347 let b: f64x2 = b.as_f64x2();
39348 let c: i64x2 = c.as_i64x2();
39349 let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
39350 let fixupimm: f64 = simd_extract!(r, 0);
39351 let r: f64x2 = simd_insert!(a, 0, fixupimm);
39352 transmute(src:r)
39353}
39354
39355/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39356/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39357///
39358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
39359#[inline]
39360#[target_feature(enable = "avx512f")]
39361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39362#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
39363#[rustc_legacy_const_generics(4, 5)]
39364pub unsafe fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39365 k: __mmask8,
39366 a: __m128d,
39367 b: __m128d,
39368 c: __m128i,
39369) -> __m128d {
39370 static_assert_uimm_bits!(IMM8, 8);
39371 static_assert_mantissas_sae!(SAE);
39372 let a: f64x2 = a.as_f64x2();
39373 let b: f64x2 = b.as_f64x2();
39374 let c: i64x2 = c.as_i64x2();
39375 let r: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
39376 let fixupimm: f64 = simd_extract!(r, 0);
39377 let r: f64x2 = simd_insert!(a, 0, fixupimm);
39378 transmute(src:r)
39379}
39380
39381/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39382///
39383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
39384#[inline]
39385#[target_feature(enable = "avx512f")]
39386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39387#[cfg_attr(test, assert_instr(vcvtss2sd))]
39388pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
39389 transmute(src:vcvtss2sd(
39390 a:a.as_f64x2(),
39391 a:b.as_f32x4(),
39392 src:src.as_f64x2(),
39393 mask:k,
39394 _MM_FROUND_CUR_DIRECTION,
39395 ))
39396}
39397
39398/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39399///
39400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
39401#[inline]
39402#[target_feature(enable = "avx512f")]
39403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39404#[cfg_attr(test, assert_instr(vcvtss2sd))]
39405pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
39406 transmute(src:vcvtss2sd(
39407 a:a.as_f64x2(),
39408 a:b.as_f32x4(),
39409 src:_mm_setzero_pd().as_f64x2(),
39410 mask:k,
39411 _MM_FROUND_CUR_DIRECTION,
39412 ))
39413}
39414
39415/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39416///
39417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
39418#[inline]
39419#[target_feature(enable = "avx512f")]
39420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39421#[cfg_attr(test, assert_instr(vcvtsd2ss))]
39422pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
39423 transmute(src:vcvtsd2ss(
39424 a:a.as_f32x4(),
39425 b:b.as_f64x2(),
39426 src:src.as_f32x4(),
39427 mask:k,
39428 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
39429 ))
39430}
39431
39432/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39433///
39434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
39435#[inline]
39436#[target_feature(enable = "avx512f")]
39437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39438#[cfg_attr(test, assert_instr(vcvtsd2ss))]
39439pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
39440 transmute(src:vcvtsd2ss(
39441 a:a.as_f32x4(),
39442 b:b.as_f64x2(),
39443 src:_mm_setzero_ps().as_f32x4(),
39444 mask:k,
39445 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
39446 ))
39447}
39448
39449/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39450/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39451///
39452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
39453#[inline]
39454#[target_feature(enable = "avx512f")]
39455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39456#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
39457#[rustc_legacy_const_generics(2)]
39458pub unsafe fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
39459 static_assert_sae!(SAE);
39460 let a: f64x2 = a.as_f64x2();
39461 let b: f32x4 = b.as_f32x4();
39462 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
39463 let r: f64x2 = vcvtss2sd(a, a:b, src:zero, mask:0b11111111, SAE);
39464 transmute(src:r)
39465}
39466
39467/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39468/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39469///
39470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
39471#[inline]
39472#[target_feature(enable = "avx512f")]
39473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39474#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
39475#[rustc_legacy_const_generics(4)]
39476pub unsafe fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
39477 src: __m128d,
39478 k: __mmask8,
39479 a: __m128d,
39480 b: __m128,
39481) -> __m128d {
39482 static_assert_sae!(SAE);
39483 let a: f64x2 = a.as_f64x2();
39484 let b: f32x4 = b.as_f32x4();
39485 let src: f64x2 = src.as_f64x2();
39486 let r: f64x2 = vcvtss2sd(a, a:b, src, mask:k, SAE);
39487 transmute(src:r)
39488}
39489
39490/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39491/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39492///
39493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
39494#[inline]
39495#[target_feature(enable = "avx512f")]
39496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39497#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
39498#[rustc_legacy_const_generics(3)]
39499pub unsafe fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(
39500 k: __mmask8,
39501 a: __m128d,
39502 b: __m128,
39503) -> __m128d {
39504 static_assert_sae!(SAE);
39505 let a: f64x2 = a.as_f64x2();
39506 let b: f32x4 = b.as_f32x4();
39507 let zero: f64x2 = _mm_setzero_pd().as_f64x2();
39508 let r: f64x2 = vcvtss2sd(a, a:b, src:zero, mask:k, SAE);
39509 transmute(src:r)
39510}
39511
39512/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39513/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39514/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39515/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39516/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39517/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39518/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39519///
39520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
39521#[inline]
39522#[target_feature(enable = "avx512f")]
39523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39524#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
39525#[rustc_legacy_const_generics(2)]
39526pub unsafe fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
39527 static_assert_rounding!(ROUNDING);
39528 let a: f32x4 = a.as_f32x4();
39529 let b: f64x2 = b.as_f64x2();
39530 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
39531 let r: f32x4 = vcvtsd2ss(a, b, src:zero, mask:0b11111111, ROUNDING);
39532 transmute(src:r)
39533}
39534
39535/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39536/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39537/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39538/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39539/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39540/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39541/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39542///
39543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
39544#[inline]
39545#[target_feature(enable = "avx512f")]
39546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39547#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
39548#[rustc_legacy_const_generics(4)]
39549pub unsafe fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
39550 src: __m128,
39551 k: __mmask8,
39552 a: __m128,
39553 b: __m128d,
39554) -> __m128 {
39555 static_assert_rounding!(ROUNDING);
39556 let a: f32x4 = a.as_f32x4();
39557 let b: f64x2 = b.as_f64x2();
39558 let src: f32x4 = src.as_f32x4();
39559 let r: f32x4 = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
39560 transmute(src:r)
39561}
39562
39563/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39564/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39565/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39566/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39567/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39568/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39569/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39570///
39571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
39572#[inline]
39573#[target_feature(enable = "avx512f")]
39574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39575#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
39576#[rustc_legacy_const_generics(3)]
39577pub unsafe fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(
39578 k: __mmask8,
39579 a: __m128,
39580 b: __m128d,
39581) -> __m128 {
39582 static_assert_rounding!(ROUNDING);
39583 let a: f32x4 = a.as_f32x4();
39584 let b: f64x2 = b.as_f64x2();
39585 let zero: f32x4 = _mm_setzero_ps().as_f32x4();
39586 let r: f32x4 = vcvtsd2ss(a, b, src:zero, mask:k, ROUNDING);
39587 transmute(src:r)
39588}
39589
39590/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39591/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39592/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39593/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39594/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39595/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39596/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39597///
39598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
39599#[inline]
39600#[target_feature(enable = "avx512f")]
39601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39602#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
39603#[rustc_legacy_const_generics(1)]
39604pub unsafe fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
39605 static_assert_rounding!(ROUNDING);
39606 let a: f32x4 = a.as_f32x4();
39607 vcvtss2si(a, ROUNDING)
39608}
39609
39610/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39611/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39612/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39613/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39614/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39615/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39616/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39617///
39618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
39619#[inline]
39620#[target_feature(enable = "avx512f")]
39621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39622#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
39623#[rustc_legacy_const_generics(1)]
39624pub unsafe fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
39625 static_assert_rounding!(ROUNDING);
39626 let a: f32x4 = a.as_f32x4();
39627 vcvtss2si(a, ROUNDING)
39628}
39629
39630/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
39631/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39632/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39633/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39634/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39635/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39636/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39637///
39638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
39639#[inline]
39640#[target_feature(enable = "avx512f")]
39641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39642#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
39643#[rustc_legacy_const_generics(1)]
39644pub unsafe fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
39645 static_assert_rounding!(ROUNDING);
39646 let a: f32x4 = a.as_f32x4();
39647 vcvtss2usi(a, ROUNDING)
39648}
39649
39650/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
39651///
39652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
39653#[inline]
39654#[target_feature(enable = "avx512f")]
39655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39656#[cfg_attr(test, assert_instr(vcvtss2si))]
39657pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
39658 vcvtss2si(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39659}
39660
39661/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
39662///
39663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
39664#[inline]
39665#[target_feature(enable = "avx512f")]
39666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39667#[cfg_attr(test, assert_instr(vcvtss2usi))]
39668pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
39669 vcvtss2usi(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39670}
39671
39672/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39673/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39674/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39675/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39676/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39677/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39678/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39679///
39680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
39681#[inline]
39682#[target_feature(enable = "avx512f")]
39683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39684#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
39685#[rustc_legacy_const_generics(1)]
39686pub unsafe fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
39687 static_assert_rounding!(ROUNDING);
39688 let a: f64x2 = a.as_f64x2();
39689 vcvtsd2si(a, ROUNDING)
39690}
39691
39692/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39693/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39694/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39695/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39696/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39697/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39698/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39699///
39700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
39701#[inline]
39702#[target_feature(enable = "avx512f")]
39703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39704#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
39705#[rustc_legacy_const_generics(1)]
39706pub unsafe fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
39707 static_assert_rounding!(ROUNDING);
39708 let a: f64x2 = a.as_f64x2();
39709 vcvtsd2si(a, ROUNDING)
39710}
39711
39712/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
39713/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39714/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39715/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39716/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39717/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39718/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39719///
39720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=cvt_roundsd_u32&expand=1364)
39721#[inline]
39722#[target_feature(enable = "avx512f")]
39723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39724#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
39725#[rustc_legacy_const_generics(1)]
39726pub unsafe fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
39727 static_assert_rounding!(ROUNDING);
39728 let a: f64x2 = a.as_f64x2();
39729 vcvtsd2usi(a, ROUNDING)
39730}
39731
39732/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
39733///
39734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
39735#[inline]
39736#[target_feature(enable = "avx512f")]
39737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39738#[cfg_attr(test, assert_instr(vcvtsd2si))]
39739pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
39740 vcvtsd2si(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39741}
39742
39743/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
39744///
39745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
39746#[inline]
39747#[target_feature(enable = "avx512f")]
39748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39749#[cfg_attr(test, assert_instr(vcvtsd2usi))]
39750pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
39751 vcvtsd2usi(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39752}
39753
39754/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39755///
39756/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39757/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39758/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39759/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39760/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39761/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39762///
39763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
39764#[inline]
39765#[target_feature(enable = "avx512f")]
39766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39767#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
39768#[rustc_legacy_const_generics(2)]
39769pub unsafe fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
39770 static_assert_rounding!(ROUNDING);
39771 let a: f32x4 = a.as_f32x4();
39772 let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
39773 transmute(src:r)
39774}
39775
39776/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39777///
39778/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39779/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39780/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39781/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39782/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39783/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39784///
39785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
39786#[inline]
39787#[target_feature(enable = "avx512f")]
39788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39789#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
39790#[rustc_legacy_const_generics(2)]
39791pub unsafe fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
39792 static_assert_rounding!(ROUNDING);
39793 let a: f32x4 = a.as_f32x4();
39794 let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
39795 transmute(src:r)
39796}
39797
39798/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39799/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39800/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
39801/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
39802/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
39803/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
39804/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
39805///
39806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
39807#[inline]
39808#[target_feature(enable = "avx512f")]
39809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39810#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
39811#[rustc_legacy_const_generics(2)]
39812pub unsafe fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
39813 static_assert_rounding!(ROUNDING);
39814 let a: f32x4 = a.as_f32x4();
39815 let r: f32x4 = vcvtusi2ss(a, b, ROUNDING);
39816 transmute(src:r)
39817}
39818
39819/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39820///
39821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
39822#[inline]
39823#[target_feature(enable = "avx512f")]
39824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39825#[cfg_attr(test, assert_instr(vcvtsi2ss))]
39826pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
39827 let b: f32 = b as f32;
39828 simd_insert!(a, 0, b)
39829}
39830
39831/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39832///
39833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
39834#[inline]
39835#[target_feature(enable = "avx512f")]
39836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39837#[cfg_attr(test, assert_instr(vcvtsi2sd))]
39838pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
39839 let b: f64 = b as f64;
39840 simd_insert!(a, 0, b)
39841}
39842
39843/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39844/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39845///
39846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_Si32&expand=1936)
39847#[inline]
39848#[target_feature(enable = "avx512f")]
39849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39850#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
39851#[rustc_legacy_const_generics(1)]
39852pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
39853 static_assert_sae!(SAE);
39854 let a: f32x4 = a.as_f32x4();
39855 vcvtss2si(a, SAE)
39856}
39857
39858/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39859/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39860///
39861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
39862#[inline]
39863#[target_feature(enable = "avx512f")]
39864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39865#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
39866#[rustc_legacy_const_generics(1)]
39867pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
39868 static_assert_sae!(SAE);
39869 let a: f32x4 = a.as_f32x4();
39870 vcvtss2si(a, SAE)
39871}
39872
39873/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
39874/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39875///
39876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
39877#[inline]
39878#[target_feature(enable = "avx512f")]
39879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39880#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))]
39881#[rustc_legacy_const_generics(1)]
39882pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
39883 static_assert_sae!(SAE);
39884 let a: f32x4 = a.as_f32x4();
39885 vcvtss2usi(a, SAE)
39886}
39887
39888/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
39889///
39890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
39891#[inline]
39892#[target_feature(enable = "avx512f")]
39893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39894#[cfg_attr(test, assert_instr(vcvtss2si))]
39895pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
39896 vcvtss2si(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39897}
39898
39899/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
39900///
39901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
39902#[inline]
39903#[target_feature(enable = "avx512f")]
39904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39905#[cfg_attr(test, assert_instr(vcvtss2usi))]
39906pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
39907 vcvtss2usi(a:a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39908}
39909
39910/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39911/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39912///
39913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
39914#[inline]
39915#[target_feature(enable = "avx512f")]
39916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39917#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
39918#[rustc_legacy_const_generics(1)]
39919pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
39920 static_assert_sae!(SAE);
39921 let a: f64x2 = a.as_f64x2();
39922 vcvtsd2si(a, SAE)
39923}
39924
39925/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39926/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39927///
39928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
39929#[inline]
39930#[target_feature(enable = "avx512f")]
39931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39932#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
39933#[rustc_legacy_const_generics(1)]
39934pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
39935 static_assert_sae!(SAE);
39936 let a: f64x2 = a.as_f64x2();
39937 vcvtsd2si(a, SAE)
39938}
39939
39940/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
39941/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39942///
39943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
39944#[inline]
39945#[target_feature(enable = "avx512f")]
39946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39947#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))]
39948#[rustc_legacy_const_generics(1)]
39949pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
39950 static_assert_sae!(SAE);
39951 let a: f64x2 = a.as_f64x2();
39952 vcvtsd2usi(a, SAE)
39953}
39954
39955/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
39956///
39957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
39958#[inline]
39959#[target_feature(enable = "avx512f")]
39960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39961#[cfg_attr(test, assert_instr(vcvtsd2si))]
39962pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
39963 vcvtsd2si(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39964}
39965
39966/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
39967///
39968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
39969#[inline]
39970#[target_feature(enable = "avx512f")]
39971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39972#[cfg_attr(test, assert_instr(vcvtsd2usi))]
39973pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
39974 vcvtsd2usi(a:a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39975}
39976
39977/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39978///
39979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
39980#[inline]
39981#[target_feature(enable = "avx512f")]
39982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39983#[cfg_attr(test, assert_instr(vcvtusi2ss))]
39984pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
39985 let b: f32 = b as f32;
39986 simd_insert!(a, 0, b)
39987}
39988
39989/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39990///
39991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
39992#[inline]
39993#[target_feature(enable = "avx512f")]
39994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39995#[cfg_attr(test, assert_instr(vcvtusi2sd))]
39996pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
39997 let b: f64 = b as f64;
39998 simd_insert!(a, 0, b)
39999}
40000
40001/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
40002/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40003///
40004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
40005#[inline]
40006#[target_feature(enable = "avx512f")]
40007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40008#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
40009#[rustc_legacy_const_generics(2, 3)]
40010pub unsafe fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
40011 static_assert_uimm_bits!(IMM5, 5);
40012 static_assert_mantissas_sae!(SAE);
40013 let a: f32x4 = a.as_f32x4();
40014 let b: f32x4 = b.as_f32x4();
40015 vcomiss(a, b, IMM5, SAE)
40016}
40017
40018/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
40019/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40020///
40021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
40022#[inline]
40023#[target_feature(enable = "avx512f")]
40024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40025#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
40026#[rustc_legacy_const_generics(2, 3)]
40027pub unsafe fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
40028 static_assert_uimm_bits!(IMM5, 5);
40029 static_assert_mantissas_sae!(SAE);
40030 let a: f64x2 = a.as_f64x2();
40031 let b: f64x2 = b.as_f64x2();
40032 vcomisd(a, b, IMM5, SAE)
40033}
40034
40035/// Equal
40036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40037pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
40038/// Less-than
40039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40040pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
40041/// Less-than-or-equal
40042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40043pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
40044/// False
40045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40046pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
40047/// Not-equal
40048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40049pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
40050/// Not less-than
40051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40052pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
40053/// Not less-than-or-equal
40054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40055pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
40056/// True
40057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40058pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
40059
40060/// interval [1, 2)
40061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40062pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
40063/// interval [0.5, 2)
40064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40065pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
40066/// interval [0.5, 1)
40067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40068pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
40069/// interval [0.75, 1.5)
40070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40071pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
40072
40073/// sign = sign(SRC)
40074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40075pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
40076/// sign = 0
40077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40078pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
40079/// DEST = NaN if sign(SRC) = 1
40080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40081pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
40082
40083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40084pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
40085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40086pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
40087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40088pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
40089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40090pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
40091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40092pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
40093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40094pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
40095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40096pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
40097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40098pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
40099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40100pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
40101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40102pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
40103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40104pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
40105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40106pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
40107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40108pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
40109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40110pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
40111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40112pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
40113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40114pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
40115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40116pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
40117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40118pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
40119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40120pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
40121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40122pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
40123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40124pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
40125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40126pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
40127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40128pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
40129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40130pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
40131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40132pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
40133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40134pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
40135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40136pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
40137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40138pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
40139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40140pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
40141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40142pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
40143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40144pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
40145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40146pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
40147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40148pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
40149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40150pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
40151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40152pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
40153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40154pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
40155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40156pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
40157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40158pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
40159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40160pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
40161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40162pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
40163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40164pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
40165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40166pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
40167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40168pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
40169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40170pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
40171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40172pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
40173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40174pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
40175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40176pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
40177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40178pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
40179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40180pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
40181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40182pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
40183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40184pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
40185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40186pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
40187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40188pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
40189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40190pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
40191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40192pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
40193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40194pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
40195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40196pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
40197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40198pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
40199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40200pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
40201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40202pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
40203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40204pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
40205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40206pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
40207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40208pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
40209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40210pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
40211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40212pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
40213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40214pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
40215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40216pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
40217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40218pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
40219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40220pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
40221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40222pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
40223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40224pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
40225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40226pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
40227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40228pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
40229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40230pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
40231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40232pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
40233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40234pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
40235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40236pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
40237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40238pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
40239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40240pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
40241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40242pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
40243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40244pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
40245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40246pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
40247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40248pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
40249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40250pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
40251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40252pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
40253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40254pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
40255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40256pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
40257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40258pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
40259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40260pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
40261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40262pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
40263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40264pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
40265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40266pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
40267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40268pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
40269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40270pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
40271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40272pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
40273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40274pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
40275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40276pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
40277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40278pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
40279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40280pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
40281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40282pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
40283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40284pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
40285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40286pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
40287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40288pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
40289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40290pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
40291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40292pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
40293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40294pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
40295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40296pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
40297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40298pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
40299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40300pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
40301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40302pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
40303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40304pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
40305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40306pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
40307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40308pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
40309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40310pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
40311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40312pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
40313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40314pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
40315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40316pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
40317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40318pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
40319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40320pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
40321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40322pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
40323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40324pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
40325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40326pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
40327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40328pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
40329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40330pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
40331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40332pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
40333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40334pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
40335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40336pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
40337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40338pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
40339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40340pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
40341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40342pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
40343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40344pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
40345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40346pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
40347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40348pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
40349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40350pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
40351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40352pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
40353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40354pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
40355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40356pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
40357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40358pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
40359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40360pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
40361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40362pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
40363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40364pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
40365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40366pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
40367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40368pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
40369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40370pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
40371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40372pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
40373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40374pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
40375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40376pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
40377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40378pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
40379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40380pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
40381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40382pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
40383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40384pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
40385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40386pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
40387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40388pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
40389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40390pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
40391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40392pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
40393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40394pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
40395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40396pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
40397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40398pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
40399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40400pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
40401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40402pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
40403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40404pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
40405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40406pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
40407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40408pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
40409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40410pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
40411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40412pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
40413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40414pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
40415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40416pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
40417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40418pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
40419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40420pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
40421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40422pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
40423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40424pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
40425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40426pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
40427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40428pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
40429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40430pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
40431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40432pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
40433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40434pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
40435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40436pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
40437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40438pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
40439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40440pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
40441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40442pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
40443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40444pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
40445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40446pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
40447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40448pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
40449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40450pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
40451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40452pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
40453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40454pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
40455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40456pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
40457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40458pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
40459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40460pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
40461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40462pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
40463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40464pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
40465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40466pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
40467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40468pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
40469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40470pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
40471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40472pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
40473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40474pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
40475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40476pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
40477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40478pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
40479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40480pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
40481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40482pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
40483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40484pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
40485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40486pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
40487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40488pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
40489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40490pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
40491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40492pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
40493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40494pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
40495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40496pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
40497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40498pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
40499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40500pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
40501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40502pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
40503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40504pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
40505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40506pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
40507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40508pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
40509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40510pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
40511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40512pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
40513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40514pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
40515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40516pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
40517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40518pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
40519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40520pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
40521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40522pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
40523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40524pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
40525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40526pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
40527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40528pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
40529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40530pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
40531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40532pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
40533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40534pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
40535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40536pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
40537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40538pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
40539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40540pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
40541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40542pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
40543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40544pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
40545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40546pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
40547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40548pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
40549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40550pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
40551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40552pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
40553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40554pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
40555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40556pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
40557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40558pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
40559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40560pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
40561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40562pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
40563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40564pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
40565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40566pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
40567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40568pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
40569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40570pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
40571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40572pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
40573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40574pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
40575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40576pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
40577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40578pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
40579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40580pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
40581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40582pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
40583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40584pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
40585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40586pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
40587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40588pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
40589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40590pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
40591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40592pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
40593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40594pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
40595
40596#[allow(improper_ctypes)]
40597extern "C" {
40598 #[link_name = "llvm.x86.avx512.pmul.dq.512"]
40599 fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
40600 #[link_name = "llvm.x86.avx512.pmulu.dq.512"]
40601 fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
40602
40603 #[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
40604 fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
40605
40606 #[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
40607 fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
40608 #[link_name = "llvm.x86.avx512.mask.pmaxs.q.256"]
40609 fn vpmaxsq256(a: i64x4, b: i64x4) -> i64x4;
40610 #[link_name = "llvm.x86.avx512.mask.pmaxs.q.128"]
40611 fn vpmaxsq128(a: i64x2, b: i64x2) -> i64x2;
40612
40613 #[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
40614 fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
40615
40616 #[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
40617 fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
40618 #[link_name = "llvm.x86.avx512.mask.pmins.q.256"]
40619 fn vpminsq256(a: i64x4, b: i64x4) -> i64x4;
40620 #[link_name = "llvm.x86.avx512.mask.pmins.q.128"]
40621 fn vpminsq128(a: i64x2, b: i64x2) -> i64x2;
40622
40623 #[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
40624 fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
40625
40626 #[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
40627 fn vpmaxuq(a: u64x8, b: u64x8) -> u64x8;
40628 #[link_name = "llvm.x86.avx512.mask.pmaxu.q.256"]
40629 fn vpmaxuq256(a: u64x4, b: u64x4) -> u64x4;
40630 #[link_name = "llvm.x86.avx512.mask.pmaxu.q.128"]
40631 fn vpmaxuq128(a: u64x2, b: u64x2) -> u64x2;
40632
40633 #[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
40634 fn vpminud(a: u32x16, b: u32x16) -> u32x16;
40635
40636 #[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
40637 fn vpminuq(a: u64x8, b: u64x8) -> u64x8;
40638 #[link_name = "llvm.x86.avx512.mask.pminu.q.256"]
40639 fn vpminuq256(a: u64x4, b: u64x4) -> u64x4;
40640 #[link_name = "llvm.x86.avx512.mask.pminu.q.128"]
40641 fn vpminuq128(a: u64x2, b: u64x2) -> u64x2;
40642
40643 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
40644 fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
40645 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
40646 fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
40647
40648 #[link_name = "llvm.fma.v16f32"]
40649 fn vfmadd132ps(a: f32x16, b: f32x16, c: f32x16) -> f32x16;
40650 #[link_name = "llvm.fma.v8f64"]
40651 fn vfmadd132pd(a: f64x8, b: f64x8, c: f64x8) -> f64x8;
40652
40653 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
40654 fn vfmadd132psround(a: f32x16, b: f32x16, c: f32x16, rounding: i32) -> f32x16;
40655 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
40656 fn vfmadd132pdround(a: f64x8, b: f64x8, c: f64x8, rounding: i32) -> f64x8;
40657
40658 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
40659 fn vfmaddsub213ps(a: f32x16, b: f32x16, c: f32x16, d: i32) -> f32x16; //from clang
40660 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
40661 fn vfmaddsub213pd(a: f64x8, b: f64x8, c: f64x8, d: i32) -> f64x8; //from clang
40662
40663 #[link_name = "llvm.x86.avx512.add.ps.512"]
40664 fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40665 #[link_name = "llvm.x86.avx512.add.pd.512"]
40666 fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40667 #[link_name = "llvm.x86.avx512.sub.ps.512"]
40668 fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40669 #[link_name = "llvm.x86.avx512.sub.pd.512"]
40670 fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40671 #[link_name = "llvm.x86.avx512.mul.ps.512"]
40672 fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40673 #[link_name = "llvm.x86.avx512.mul.pd.512"]
40674 fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40675 #[link_name = "llvm.x86.avx512.div.ps.512"]
40676 fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40677 #[link_name = "llvm.x86.avx512.div.pd.512"]
40678 fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40679
40680 #[link_name = "llvm.x86.avx512.max.ps.512"]
40681 fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
40682 #[link_name = "llvm.x86.avx512.max.pd.512"]
40683 fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
40684 #[link_name = "llvm.x86.avx512.min.ps.512"]
40685 fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
40686 #[link_name = "llvm.x86.avx512.min.pd.512"]
40687 fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
40688
40689 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
40690 fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
40691
40692 #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
40693 fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40694 #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
40695 fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40696
40697 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
40698 fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
40699 #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
40700 fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40701 #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
40702 fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40703
40704 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
40705 fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
40706 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
40707 fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
40708 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
40709 fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
40710
40711 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
40712 fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
40713 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
40714 fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
40715 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
40716 fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
40717
40718 #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
40719 fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
40720 #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
40721 fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
40722 #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
40723 fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
40724
40725 #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
40726 fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
40727 #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
40728 fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
40729 #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
40730 fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
40731
40732 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
40733 fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
40734 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
40735 fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
40736 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
40737 fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
40738
40739 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
40740 fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
40741 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
40742 fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
40743 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
40744 fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
40745
40746 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
40747 fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
40748 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
40749 fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
40750 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
40751 fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
40752
40753 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
40754 fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
40755 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
40756 fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
40757 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
40758 fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
40759
40760 #[link_name = "llvm.x86.avx512.pternlog.d.512"]
40761 fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
40762 #[link_name = "llvm.x86.avx512.pternlog.d.256"]
40763 fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
40764 #[link_name = "llvm.x86.avx512.pternlog.d.128"]
40765 fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
40766
40767 #[link_name = "llvm.x86.avx512.pternlog.q.512"]
40768 fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
40769 #[link_name = "llvm.x86.avx512.pternlog.q.256"]
40770 fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
40771 #[link_name = "llvm.x86.avx512.pternlog.q.128"]
40772 fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
40773
40774 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
40775 fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
40776 #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
40777 fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
40778 #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
40779 fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
40780
40781 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
40782 fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
40783 #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
40784 fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
40785 #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
40786 fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
40787
40788 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
40789 fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
40790 #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
40791 fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40792 #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
40793 fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40794
40795 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
40796 fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
40797 #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
40798 fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40799 #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
40800 fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40801
40802 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
40803 fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
40804 #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
40805 fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40806 #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
40807 fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40808
40809 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
40810 fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
40811 #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
40812 fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40813 #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
40814 fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40815
40816 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
40817 fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
40818
40819 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
40820 fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
40821 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
40822 fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
40823 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
40824 fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
40825
40826 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
40827 fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
40828 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
40829 fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
40830
40831 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
40832 fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
40833
40834 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
40835 fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
40836 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
40837 fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
40838 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
40839 fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
40840
40841 #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
40842 fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
40843 #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
40844 fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
40845
40846 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
40847 fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
40848 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
40849 fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
40850 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
40851 fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
40852
40853 #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
40854 fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
40855
40856 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
40857 fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
40858 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
40859 fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
40860 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
40861 fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
40862
40863 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
40864 fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
40865 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
40866 fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
40867 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
40868 fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
40869
40870 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
40871 fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
40872 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
40873 fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
40874 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
40875 fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
40876
40877 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
40878 fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
40879 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
40880 fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
40881 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
40882 fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
40883
40884 #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
40885 fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
40886 #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
40887 fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
40888 #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
40889 fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
40890
40891 #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
40892 fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
40893 #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
40894 fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
40895 #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
40896 fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
40897 #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
40898 fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
40899 #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
40900 fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
40901
40902 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
40903 fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40904 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
40905 fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40906 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
40907 fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40908
40909 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
40910 fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40911 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
40912 fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40913 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
40914 fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40915
40916 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
40917 fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40918 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
40919 fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40920 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
40921 fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40922
40923 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
40924 fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40925 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
40926 fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40927 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
40928 fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40929
40930 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
40931 fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40932 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
40933 fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40934 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
40935 fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40936
40937 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
40938 fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40939 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
40940 fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40941 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
40942 fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40943
40944 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
40945 fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40946 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
40947 fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40948 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
40949 fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40950
40951 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
40952 fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40953 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
40954 fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40955 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
40956 fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40957
40958 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
40959 fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40960 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
40961 fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40962 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
40963 fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40964
40965 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
40966 fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40967 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
40968 fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40969 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
40970 fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40971
40972 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
40973 fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40974 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
40975 fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40976 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
40977 fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40978
40979 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
40980 fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40981 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
40982 fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40983 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
40984 fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40985
40986 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
40987 fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40988 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
40989 fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40990 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
40991 fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40992
40993 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
40994 fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40995 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
40996 fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40997 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
40998 fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40999
41000 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
41001 fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
41002 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
41003 fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
41004 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
41005 fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
41006
41007 #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
41008 fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
41009
41010 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
41011 fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
41012 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
41013 fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
41014 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
41015 fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
41016
41017 #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
41018 fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
41019 #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
41020 fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
41021 #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
41022 fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
41023
41024 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
41025 fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
41026 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
41027 fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
41028 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
41029 fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
41030
41031 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
41032 fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
41033 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
41034 fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
41035 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
41036 fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
41037
41038 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
41039 fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
41040 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
41041 fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
41042 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
41043 fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
41044
41045 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
41046 fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
41047 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
41048 fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
41049 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
41050 fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
41051
41052 #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
41053 fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
41054 #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
41055 fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
41056 #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
41057 fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
41058
41059 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
41060 fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
41061 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
41062 fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
41063 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
41064 fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
41065
41066 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
41067 fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
41068 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
41069 fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
41070 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
41071 fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
41072
41073 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
41074 fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
41075 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
41076 fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
41077 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
41078 fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
41079
41080 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
41081 fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
41082 #[link_name = "llvm.x86.avx512.gather.dps.512"]
41083 fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
41084 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
41085 fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
41086 #[link_name = "llvm.x86.avx512.gather.qps.512"]
41087 fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
41088 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
41089 fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
41090 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
41091 fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
41092 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
41093 fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
41094 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
41095 fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
41096
41097 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
41098 fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
41099 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
41100 fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
41101 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
41102 fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
41103 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
41104 fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
41105 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
41106 fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
41107 #[link_name = "llvm.x86.avx512.scattersiv4.di"]
41108 fn vpscatterdq256(slice: *mut i8, mask: i8, offsets: i32x4, src: i64x4, scale: i32);
41109
41110 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
41111 fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
41112 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
41113 fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
41114 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
41115 fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
41116
41117 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
41118 fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
41119 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
41120 fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
41121
41122 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
41123 fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
41124 #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
41125 fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
41126 #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
41127 fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
41128
41129 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
41130 fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
41131 #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
41132 fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
41133 #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
41134 fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
41135
41136 #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
41137 fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
41138 #[link_name = "llvm.x86.avx512.mask.ucmp.q.256"]
41139 fn vpcmpuq256(a: i64x4, b: i64x4, op: i32, m: i8) -> i8;
41140 #[link_name = "llvm.x86.avx512.mask.ucmp.q.128"]
41141 fn vpcmpuq128(a: i64x2, b: i64x2, op: i32, m: i8) -> i8;
41142
41143 #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
41144 fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
41145 #[link_name = "llvm.x86.avx512.mask.cmp.q.256"]
41146 fn vpcmpq256(a: i64x4, b: i64x4, op: i32, m: i8) -> i8;
41147 #[link_name = "llvm.x86.avx512.mask.cmp.q.128"]
41148 fn vpcmpq128(a: i64x2, b: i64x2, op: i32, m: i8) -> i8;
41149
41150 #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
41151 fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
41152 #[link_name = "llvm.x86.avx512.mask.ucmp.d.256"]
41153 fn vpcmpud256(a: i32x8, b: i32x8, op: i32, m: i8) -> i8;
41154 #[link_name = "llvm.x86.avx512.mask.ucmp.d.128"]
41155 fn vpcmpud128(a: i32x4, b: i32x4, op: i32, m: i8) -> i8;
41156
41157 #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
41158 fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
41159 #[link_name = "llvm.x86.avx512.mask.cmp.d.256"]
41160 fn vpcmpd256(a: i32x8, b: i32x8, op: i32, m: i8) -> i8;
41161 #[link_name = "llvm.x86.avx512.mask.cmp.d.128"]
41162 fn vpcmpd128(a: i32x4, b: i32x4, op: i32, m: i8) -> i8;
41163
41164 #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
41165 fn vprold(a: i32x16, i8: i32) -> i32x16;
41166 #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
41167 fn vprold256(a: i32x8, i8: i32) -> i32x8;
41168 #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
41169 fn vprold128(a: i32x4, i8: i32) -> i32x4;
41170
41171 #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
41172 fn vprord(a: i32x16, i8: i32) -> i32x16;
41173 #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
41174 fn vprord256(a: i32x8, i8: i32) -> i32x8;
41175 #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
41176 fn vprord128(a: i32x4, i8: i32) -> i32x4;
41177
41178 #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
41179 fn vprolq(a: i64x8, i8: i32) -> i64x8;
41180 #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
41181 fn vprolq256(a: i64x4, i8: i32) -> i64x4;
41182 #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
41183 fn vprolq128(a: i64x2, i8: i32) -> i64x2;
41184
41185 #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
41186 fn vprorq(a: i64x8, i8: i32) -> i64x8;
41187 #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
41188 fn vprorq256(a: i64x4, i8: i32) -> i64x4;
41189 #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
41190 fn vprorq128(a: i64x2, i8: i32) -> i64x2;
41191
41192 #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
41193 fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
41194 #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
41195 fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
41196 #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
41197 fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
41198
41199 #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
41200 fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
41201 #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
41202 fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
41203 #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
41204 fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
41205
41206 #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
41207 fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
41208 #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
41209 fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
41210 #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
41211 fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
41212
41213 #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
41214 fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
41215 #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
41216 fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
41217 #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
41218 fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
41219
41220 #[link_name = "llvm.x86.avx512.psllv.d.512"]
41221 fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
41222 #[link_name = "llvm.x86.avx512.psrlv.d.512"]
41223 fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
41224 #[link_name = "llvm.x86.avx512.psllv.q.512"]
41225 fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
41226 #[link_name = "llvm.x86.avx512.psrlv.q.512"]
41227 fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
41228
41229 #[link_name = "llvm.x86.avx512.psll.d.512"]
41230 fn vpslld(a: i32x16, count: i32x4) -> i32x16;
41231 #[link_name = "llvm.x86.avx512.psrl.d.512"]
41232 fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
41233 #[link_name = "llvm.x86.avx512.psll.q.512"]
41234 fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
41235 #[link_name = "llvm.x86.avx512.psrl.q.512"]
41236 fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
41237
41238 #[link_name = "llvm.x86.avx512.psra.d.512"]
41239 fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
41240
41241 #[link_name = "llvm.x86.avx512.psra.q.512"]
41242 fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
41243 #[link_name = "llvm.x86.avx512.psra.q.256"]
41244 fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
41245 #[link_name = "llvm.x86.avx512.psra.q.128"]
41246 fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
41247
41248 #[link_name = "llvm.x86.avx512.psrav.d.512"]
41249 fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
41250
41251 #[link_name = "llvm.x86.avx512.psrav.q.512"]
41252 fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
41253 #[link_name = "llvm.x86.avx512.psrav.q.256"]
41254 fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
41255 #[link_name = "llvm.x86.avx512.psrav.q.128"]
41256 fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
41257
41258 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
41259 fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
41260 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
41261 fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
41262
41263 #[link_name = "llvm.x86.avx512.permvar.si.512"]
41264 fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
41265
41266 #[link_name = "llvm.x86.avx512.permvar.di.512"]
41267 fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
41268 #[link_name = "llvm.x86.avx512.permvar.di.256"]
41269 fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
41270
41271 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
41272 fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
41273
41274 #[link_name = "llvm.x86.avx512.permvar.df.512"]
41275 fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
41276 #[link_name = "llvm.x86.avx512.permvar.df.256"]
41277 fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
41278
41279 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
41280 fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
41281 #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
41282 fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
41283 #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
41284 fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
41285
41286 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
41287 fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
41288 #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
41289 fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
41290 #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
41291 fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
41292
41293 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
41294 fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
41295 #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
41296 fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
41297 #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
41298 fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
41299
41300 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
41301 fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
41302 #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
41303 fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
41304 #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
41305 fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
41306
41307 #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
41308 fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
41309 #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
41310 fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
41311 #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
41312 fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
41313
41314 #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
41315 fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
41316 #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
41317 fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
41318 #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
41319 fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
41320
41321 #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
41322 fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
41323 #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
41324 fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
41325 #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
41326 fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
41327
41328 #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
41329 fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
41330 #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
41331 fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
41332 #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
41333 fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
41334
41335 #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
41336 fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
41337 #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
41338 fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
41339 #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
41340 fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
41341
41342 #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
41343 fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
41344 #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
41345 fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
41346 #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
41347 fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
41348
41349 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
41350 fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
41351 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
41352 fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
41353 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
41354 fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
41355
41356 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
41357 fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
41358 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
41359 fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
41360 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
41361 fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
41362
41363 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
41364 fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
41365 #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
41366 fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
41367 #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
41368 fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
41369
41370 #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
41371 fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
41372 #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
41373 fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
41374 #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
41375 fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
41376
41377 #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
41378 fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
41379 #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
41380 fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
41381 #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
41382 fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
41383
41384 #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
41385 fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
41386 #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
41387 fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
41388 #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
41389 fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
41390
41391 #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
41392 fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41393 #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
41394 fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41395 #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
41396 fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41397 #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
41398 fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41399 #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
41400 fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41401 #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
41402 fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41403 #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
41404 fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41405 #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
41406 fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41407 #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
41408 fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41409 #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
41410 fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41411 #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
41412 fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41413 #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
41414 fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41415 #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
41416 fn vsqrtss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41417 #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
41418 fn vsqrtsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41419 #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
41420 fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41421 #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
41422 fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41423 #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
41424 fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
41425 #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
41426 fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
41427
41428 #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
41429 fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
41430 #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
41431 fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
41432 #[link_name = "llvm.x86.avx512.rcp14.ss"]
41433 fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
41434 #[link_name = "llvm.x86.avx512.rcp14.sd"]
41435 fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
41436
41437 #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
41438 fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
41439 #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
41440 fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
41441 #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
41442 fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41443 #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
41444 fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41445
41446 #[link_name = "llvm.x86.avx512.vfmadd.f32"]
41447 fn vfmadd132ss(a: f32, b: f32, c: f32, rounding: i32) -> f32;
41448 #[link_name = "llvm.x86.avx512.vfmadd.f64"]
41449 fn vfmadd132sd(a: f64, b: f64, c: f64, rounding: i32) -> f64;
41450
41451 #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
41452 fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
41453 #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
41454 fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
41455 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
41456 fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
41457 #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
41458 fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
41459
41460 #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
41461 fn vcvtss2sd(a: f64x2, a: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
41462 #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
41463 fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41464
41465 #[link_name = "llvm.x86.avx512.vcvtss2si32"]
41466 fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
41467 #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
41468 fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
41469
41470 #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
41471 fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
41472 #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
41473 fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
41474
41475 #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
41476 fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
41477 #[link_name = "llvm.x86.avx512.cvtsi2sd64"]
41478 fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
41479
41480 #[link_name = "llvm.x86.avx512.cvtusi2ss"]
41481 fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
41482 #[link_name = "llvm.x86.avx512.cvtusi642sd"]
41483 fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
41484
41485 #[link_name = "llvm.x86.avx512.vcomi.ss"]
41486 fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
41487 #[link_name = "llvm.x86.avx512.vcomi.sd"]
41488 fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
41489}
41490
41491#[cfg(test)]
41492mod tests {
41493
41494 use stdarch_test::simd_test;
41495
41496 use crate::core_arch::x86::*;
41497 use crate::hint::black_box;
41498 use crate::mem::{self};
41499
41500 #[simd_test(enable = "avx512f")]
41501 unsafe fn test_mm512_abs_epi32() {
41502 #[rustfmt::skip]
41503 let a = _mm512_setr_epi32(
41504 0, 1, -1, i32::MAX,
41505 i32::MIN, 100, -100, -32,
41506 0, 1, -1, i32::MAX,
41507 i32::MIN, 100, -100, -32,
41508 );
41509 let r = _mm512_abs_epi32(a);
41510 #[rustfmt::skip]
41511 let e = _mm512_setr_epi32(
41512 0, 1, 1, i32::MAX,
41513 i32::MAX.wrapping_add(1), 100, 100, 32,
41514 0, 1, 1, i32::MAX,
41515 i32::MAX.wrapping_add(1), 100, 100, 32,
41516 );
41517 assert_eq_m512i(r, e);
41518 }
41519
41520 #[simd_test(enable = "avx512f")]
41521 unsafe fn test_mm512_mask_abs_epi32() {
41522 #[rustfmt::skip]
41523 let a = _mm512_setr_epi32(
41524 0, 1, -1, i32::MAX,
41525 i32::MIN, 100, -100, -32,
41526 0, 1, -1, i32::MAX,
41527 i32::MIN, 100, -100, -32,
41528 );
41529 let r = _mm512_mask_abs_epi32(a, 0, a);
41530 assert_eq_m512i(r, a);
41531 let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
41532 #[rustfmt::skip]
41533 let e = _mm512_setr_epi32(
41534 0, 1, 1, i32::MAX,
41535 i32::MAX.wrapping_add(1), 100, 100, 32,
41536 0, 1, -1, i32::MAX,
41537 i32::MIN, 100, -100, -32,
41538 );
41539 assert_eq_m512i(r, e);
41540 }
41541
41542 #[simd_test(enable = "avx512f")]
41543 unsafe fn test_mm512_maskz_abs_epi32() {
41544 #[rustfmt::skip]
41545 let a = _mm512_setr_epi32(
41546 0, 1, -1, i32::MAX,
41547 i32::MIN, 100, -100, -32,
41548 0, 1, -1, i32::MAX,
41549 i32::MIN, 100, -100, -32,
41550 );
41551 let r = _mm512_maskz_abs_epi32(0, a);
41552 assert_eq_m512i(r, _mm512_setzero_si512());
41553 let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
41554 #[rustfmt::skip]
41555 let e = _mm512_setr_epi32(
41556 0, 1, 1, i32::MAX,
41557 i32::MAX.wrapping_add(1), 100, 100, 32,
41558 0, 0, 0, 0,
41559 0, 0, 0, 0,
41560 );
41561 assert_eq_m512i(r, e);
41562 }
41563
41564 #[simd_test(enable = "avx512f,avx512vl")]
41565 unsafe fn test_mm256_mask_abs_epi32() {
41566 #[rustfmt::skip]
41567 let a = _mm256_setr_epi32(
41568 0, 1, -1, i32::MAX,
41569 i32::MIN, 100, -100, -32,
41570 );
41571 let r = _mm256_mask_abs_epi32(a, 0, a);
41572 assert_eq_m256i(r, a);
41573 let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
41574 #[rustfmt::skip]
41575 let e = _mm256_setr_epi32(
41576 0, 1, 1, i32::MAX,
41577 i32::MAX.wrapping_add(1), 100, -100, -32,
41578 );
41579 assert_eq_m256i(r, e);
41580 }
41581
41582 #[simd_test(enable = "avx512f,avx512vl")]
41583 unsafe fn test_mm256_maskz_abs_epi32() {
41584 #[rustfmt::skip]
41585 let a = _mm256_setr_epi32(
41586 0, 1, -1, i32::MAX,
41587 i32::MIN, 100, -100, -32,
41588 );
41589 let r = _mm256_maskz_abs_epi32(0, a);
41590 assert_eq_m256i(r, _mm256_setzero_si256());
41591 let r = _mm256_maskz_abs_epi32(0b00001111, a);
41592 #[rustfmt::skip]
41593 let e = _mm256_setr_epi32(
41594 0, 1, 1, i32::MAX,
41595 0, 0, 0, 0,
41596 );
41597 assert_eq_m256i(r, e);
41598 }
41599
41600 #[simd_test(enable = "avx512f,avx512vl")]
41601 unsafe fn test_mm_mask_abs_epi32() {
41602 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
41603 let r = _mm_mask_abs_epi32(a, 0, a);
41604 assert_eq_m128i(r, a);
41605 let r = _mm_mask_abs_epi32(a, 0b00001111, a);
41606 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
41607 assert_eq_m128i(r, e);
41608 }
41609
41610 #[simd_test(enable = "avx512f,avx512vl")]
41611 unsafe fn test_mm_maskz_abs_epi32() {
41612 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
41613 let r = _mm_maskz_abs_epi32(0, a);
41614 assert_eq_m128i(r, _mm_setzero_si128());
41615 let r = _mm_maskz_abs_epi32(0b00001111, a);
41616 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
41617 assert_eq_m128i(r, e);
41618 }
41619
41620 #[simd_test(enable = "avx512f")]
41621 unsafe fn test_mm512_abs_ps() {
41622 #[rustfmt::skip]
41623 let a = _mm512_setr_ps(
41624 0., 1., -1., f32::MAX,
41625 f32::MIN, 100., -100., -32.,
41626 0., 1., -1., f32::MAX,
41627 f32::MIN, 100., -100., -32.,
41628 );
41629 let r = _mm512_abs_ps(a);
41630 #[rustfmt::skip]
41631 let e = _mm512_setr_ps(
41632 0., 1., 1., f32::MAX,
41633 f32::MAX, 100., 100., 32.,
41634 0., 1., 1., f32::MAX,
41635 f32::MAX, 100., 100., 32.,
41636 );
41637 assert_eq_m512(r, e);
41638 }
41639
41640 #[simd_test(enable = "avx512f")]
41641 unsafe fn test_mm512_mask_abs_ps() {
41642 #[rustfmt::skip]
41643 let a = _mm512_setr_ps(
41644 0., 1., -1., f32::MAX,
41645 f32::MIN, 100., -100., -32.,
41646 0., 1., -1., f32::MAX,
41647 f32::MIN, 100., -100., -32.,
41648 );
41649 let r = _mm512_mask_abs_ps(a, 0, a);
41650 assert_eq_m512(r, a);
41651 let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
41652 #[rustfmt::skip]
41653 let e = _mm512_setr_ps(
41654 0., 1., 1., f32::MAX,
41655 f32::MAX, 100., 100., 32.,
41656 0., 1., -1., f32::MAX,
41657 f32::MIN, 100., -100., -32.,
41658 );
41659 assert_eq_m512(r, e);
41660 }
41661
41662 #[simd_test(enable = "avx512f")]
41663 unsafe fn test_mm512_mask_mov_epi32() {
41664 let src = _mm512_set1_epi32(1);
41665 let a = _mm512_set1_epi32(2);
41666 let r = _mm512_mask_mov_epi32(src, 0, a);
41667 assert_eq_m512i(r, src);
41668 let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
41669 assert_eq_m512i(r, a);
41670 }
41671
41672 #[simd_test(enable = "avx512f")]
41673 unsafe fn test_mm512_maskz_mov_epi32() {
41674 let a = _mm512_set1_epi32(2);
41675 let r = _mm512_maskz_mov_epi32(0, a);
41676 assert_eq_m512i(r, _mm512_setzero_si512());
41677 let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
41678 assert_eq_m512i(r, a);
41679 }
41680
41681 #[simd_test(enable = "avx512f,avx512vl")]
41682 unsafe fn test_mm256_mask_mov_epi32() {
41683 let src = _mm256_set1_epi32(1);
41684 let a = _mm256_set1_epi32(2);
41685 let r = _mm256_mask_mov_epi32(src, 0, a);
41686 assert_eq_m256i(r, src);
41687 let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
41688 assert_eq_m256i(r, a);
41689 }
41690
41691 #[simd_test(enable = "avx512f,avx512vl")]
41692 unsafe fn test_mm256_maskz_mov_epi32() {
41693 let a = _mm256_set1_epi32(2);
41694 let r = _mm256_maskz_mov_epi32(0, a);
41695 assert_eq_m256i(r, _mm256_setzero_si256());
41696 let r = _mm256_maskz_mov_epi32(0b11111111, a);
41697 assert_eq_m256i(r, a);
41698 }
41699
41700 #[simd_test(enable = "avx512f,avx512vl")]
41701 unsafe fn test_mm_mask_mov_epi32() {
41702 let src = _mm_set1_epi32(1);
41703 let a = _mm_set1_epi32(2);
41704 let r = _mm_mask_mov_epi32(src, 0, a);
41705 assert_eq_m128i(r, src);
41706 let r = _mm_mask_mov_epi32(src, 0b00001111, a);
41707 assert_eq_m128i(r, a);
41708 }
41709
41710 #[simd_test(enable = "avx512f,avx512vl")]
41711 unsafe fn test_mm_maskz_mov_epi32() {
41712 let a = _mm_set1_epi32(2);
41713 let r = _mm_maskz_mov_epi32(0, a);
41714 assert_eq_m128i(r, _mm_setzero_si128());
41715 let r = _mm_maskz_mov_epi32(0b00001111, a);
41716 assert_eq_m128i(r, a);
41717 }
41718
41719 #[simd_test(enable = "avx512f")]
41720 unsafe fn test_mm512_mask_mov_ps() {
41721 let src = _mm512_set1_ps(1.);
41722 let a = _mm512_set1_ps(2.);
41723 let r = _mm512_mask_mov_ps(src, 0, a);
41724 assert_eq_m512(r, src);
41725 let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
41726 assert_eq_m512(r, a);
41727 }
41728
41729 #[simd_test(enable = "avx512f")]
41730 unsafe fn test_mm512_maskz_mov_ps() {
41731 let a = _mm512_set1_ps(2.);
41732 let r = _mm512_maskz_mov_ps(0, a);
41733 assert_eq_m512(r, _mm512_setzero_ps());
41734 let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
41735 assert_eq_m512(r, a);
41736 }
41737
41738 #[simd_test(enable = "avx512f,avx512vl")]
41739 unsafe fn test_mm256_mask_mov_ps() {
41740 let src = _mm256_set1_ps(1.);
41741 let a = _mm256_set1_ps(2.);
41742 let r = _mm256_mask_mov_ps(src, 0, a);
41743 assert_eq_m256(r, src);
41744 let r = _mm256_mask_mov_ps(src, 0b11111111, a);
41745 assert_eq_m256(r, a);
41746 }
41747
41748 #[simd_test(enable = "avx512f,avx512vl")]
41749 unsafe fn test_mm256_maskz_mov_ps() {
41750 let a = _mm256_set1_ps(2.);
41751 let r = _mm256_maskz_mov_ps(0, a);
41752 assert_eq_m256(r, _mm256_setzero_ps());
41753 let r = _mm256_maskz_mov_ps(0b11111111, a);
41754 assert_eq_m256(r, a);
41755 }
41756
41757 #[simd_test(enable = "avx512f,avx512vl")]
41758 unsafe fn test_mm_mask_mov_ps() {
41759 let src = _mm_set1_ps(1.);
41760 let a = _mm_set1_ps(2.);
41761 let r = _mm_mask_mov_ps(src, 0, a);
41762 assert_eq_m128(r, src);
41763 let r = _mm_mask_mov_ps(src, 0b00001111, a);
41764 assert_eq_m128(r, a);
41765 }
41766
41767 #[simd_test(enable = "avx512f,avx512vl")]
41768 unsafe fn test_mm_maskz_mov_ps() {
41769 let a = _mm_set1_ps(2.);
41770 let r = _mm_maskz_mov_ps(0, a);
41771 assert_eq_m128(r, _mm_setzero_ps());
41772 let r = _mm_maskz_mov_ps(0b00001111, a);
41773 assert_eq_m128(r, a);
41774 }
41775
41776 #[simd_test(enable = "avx512f")]
41777 unsafe fn test_mm512_add_epi32() {
41778 #[rustfmt::skip]
41779 let a = _mm512_setr_epi32(
41780 0, 1, -1, i32::MAX,
41781 i32::MIN, 100, -100, -32,
41782 0, 1, -1, i32::MAX,
41783 i32::MIN, 100, -100, -32,
41784 );
41785 let b = _mm512_set1_epi32(1);
41786 let r = _mm512_add_epi32(a, b);
41787 #[rustfmt::skip]
41788 let e = _mm512_setr_epi32(
41789 1, 2, 0, i32::MIN,
41790 i32::MIN + 1, 101, -99, -31,
41791 1, 2, 0, i32::MIN,
41792 i32::MIN + 1, 101, -99, -31,
41793 );
41794 assert_eq_m512i(r, e);
41795 }
41796
41797 #[simd_test(enable = "avx512f")]
41798 unsafe fn test_mm512_mask_add_epi32() {
41799 #[rustfmt::skip]
41800 let a = _mm512_setr_epi32(
41801 0, 1, -1, i32::MAX,
41802 i32::MIN, 100, -100, -32,
41803 0, 1, -1, i32::MAX,
41804 i32::MIN, 100, -100, -32,
41805 );
41806 let b = _mm512_set1_epi32(1);
41807 let r = _mm512_mask_add_epi32(a, 0, a, b);
41808 assert_eq_m512i(r, a);
41809 let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
41810 #[rustfmt::skip]
41811 let e = _mm512_setr_epi32(
41812 1, 2, 0, i32::MIN,
41813 i32::MIN + 1, 101, -99, -31,
41814 0, 1, -1, i32::MAX,
41815 i32::MIN, 100, -100, -32,
41816 );
41817 assert_eq_m512i(r, e);
41818 }
41819
41820 #[simd_test(enable = "avx512f")]
41821 unsafe fn test_mm512_maskz_add_epi32() {
41822 #[rustfmt::skip]
41823 let a = _mm512_setr_epi32(
41824 0, 1, -1, i32::MAX,
41825 i32::MIN, 100, -100, -32,
41826 0, 1, -1, i32::MAX,
41827 i32::MIN, 100, -100, -32,
41828 );
41829 let b = _mm512_set1_epi32(1);
41830 let r = _mm512_maskz_add_epi32(0, a, b);
41831 assert_eq_m512i(r, _mm512_setzero_si512());
41832 let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
41833 #[rustfmt::skip]
41834 let e = _mm512_setr_epi32(
41835 1, 2, 0, i32::MIN,
41836 i32::MIN + 1, 101, -99, -31,
41837 0, 0, 0, 0,
41838 0, 0, 0, 0,
41839 );
41840 assert_eq_m512i(r, e);
41841 }
41842
41843 #[simd_test(enable = "avx512f,avx512vl")]
41844 unsafe fn test_mm256_mask_add_epi32() {
41845 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
41846 let b = _mm256_set1_epi32(1);
41847 let r = _mm256_mask_add_epi32(a, 0, a, b);
41848 assert_eq_m256i(r, a);
41849 let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
41850 let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
41851 assert_eq_m256i(r, e);
41852 }
41853
41854 #[simd_test(enable = "avx512f,avx512vl")]
41855 unsafe fn test_mm256_maskz_add_epi32() {
41856 let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
41857 let b = _mm256_set1_epi32(1);
41858 let r = _mm256_maskz_add_epi32(0, a, b);
41859 assert_eq_m256i(r, _mm256_setzero_si256());
41860 let r = _mm256_maskz_add_epi32(0b11111111, a, b);
41861 let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
41862 assert_eq_m256i(r, e);
41863 }
41864
41865 #[simd_test(enable = "avx512f,avx512vl")]
41866 unsafe fn test_mm_mask_add_epi32() {
41867 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
41868 let b = _mm_set1_epi32(1);
41869 let r = _mm_mask_add_epi32(a, 0, a, b);
41870 assert_eq_m128i(r, a);
41871 let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
41872 let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
41873 assert_eq_m128i(r, e);
41874 }
41875
41876 #[simd_test(enable = "avx512f,avx512vl")]
41877 unsafe fn test_mm_maskz_add_epi32() {
41878 let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
41879 let b = _mm_set1_epi32(1);
41880 let r = _mm_maskz_add_epi32(0, a, b);
41881 assert_eq_m128i(r, _mm_setzero_si128());
41882 let r = _mm_maskz_add_epi32(0b00001111, a, b);
41883 let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
41884 assert_eq_m128i(r, e);
41885 }
41886
41887 #[simd_test(enable = "avx512f")]
41888 unsafe fn test_mm512_add_ps() {
41889 #[rustfmt::skip]
41890 let a = _mm512_setr_ps(
41891 0., 1., -1., f32::MAX,
41892 f32::MIN, 100., -100., -32.,
41893 0., 1., -1., f32::MAX,
41894 f32::MIN, 100., -100., -32.,
41895 );
41896 let b = _mm512_set1_ps(1.);
41897 let r = _mm512_add_ps(a, b);
41898 #[rustfmt::skip]
41899 let e = _mm512_setr_ps(
41900 1., 2., 0., f32::MAX,
41901 f32::MIN + 1., 101., -99., -31.,
41902 1., 2., 0., f32::MAX,
41903 f32::MIN + 1., 101., -99., -31.,
41904 );
41905 assert_eq_m512(r, e);
41906 }
41907
41908 #[simd_test(enable = "avx512f")]
41909 unsafe fn test_mm512_mask_add_ps() {
41910 #[rustfmt::skip]
41911 let a = _mm512_setr_ps(
41912 0., 1., -1., f32::MAX,
41913 f32::MIN, 100., -100., -32.,
41914 0., 1., -1., f32::MAX,
41915 f32::MIN, 100., -100., -32.,
41916 );
41917 let b = _mm512_set1_ps(1.);
41918 let r = _mm512_mask_add_ps(a, 0, a, b);
41919 assert_eq_m512(r, a);
41920 let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
41921 #[rustfmt::skip]
41922 let e = _mm512_setr_ps(
41923 1., 2., 0., f32::MAX,
41924 f32::MIN + 1., 101., -99., -31.,
41925 0., 1., -1., f32::MAX,
41926 f32::MIN, 100., -100., -32.,
41927 );
41928 assert_eq_m512(r, e);
41929 }
41930
41931 #[simd_test(enable = "avx512f")]
41932 unsafe fn test_mm512_maskz_add_ps() {
41933 #[rustfmt::skip]
41934 let a = _mm512_setr_ps(
41935 0., 1., -1., f32::MAX,
41936 f32::MIN, 100., -100., -32.,
41937 0., 1., -1., f32::MAX,
41938 f32::MIN, 100., -100., -32.,
41939 );
41940 let b = _mm512_set1_ps(1.);
41941 let r = _mm512_maskz_add_ps(0, a, b);
41942 assert_eq_m512(r, _mm512_setzero_ps());
41943 let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
41944 #[rustfmt::skip]
41945 let e = _mm512_setr_ps(
41946 1., 2., 0., f32::MAX,
41947 f32::MIN + 1., 101., -99., -31.,
41948 0., 0., 0., 0.,
41949 0., 0., 0., 0.,
41950 );
41951 assert_eq_m512(r, e);
41952 }
41953
41954 #[simd_test(enable = "avx512f,avx512vl")]
41955 unsafe fn test_mm256_mask_add_ps() {
41956 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
41957 let b = _mm256_set1_ps(1.);
41958 let r = _mm256_mask_add_ps(a, 0, a, b);
41959 assert_eq_m256(r, a);
41960 let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
41961 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
41962 assert_eq_m256(r, e);
41963 }
41964
41965 #[simd_test(enable = "avx512f,avx512vl")]
41966 unsafe fn test_mm256_maskz_add_ps() {
41967 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
41968 let b = _mm256_set1_ps(1.);
41969 let r = _mm256_maskz_add_ps(0, a, b);
41970 assert_eq_m256(r, _mm256_setzero_ps());
41971 let r = _mm256_maskz_add_ps(0b11111111, a, b);
41972 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
41973 assert_eq_m256(r, e);
41974 }
41975
41976 #[simd_test(enable = "avx512f,avx512vl")]
41977 unsafe fn test_mm_mask_add_ps() {
41978 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
41979 let b = _mm_set1_ps(1.);
41980 let r = _mm_mask_add_ps(a, 0, a, b);
41981 assert_eq_m128(r, a);
41982 let r = _mm_mask_add_ps(a, 0b00001111, a, b);
41983 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
41984 assert_eq_m128(r, e);
41985 }
41986
41987 #[simd_test(enable = "avx512f,avx512vl")]
41988 unsafe fn test_mm_maskz_add_ps() {
41989 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
41990 let b = _mm_set1_ps(1.);
41991 let r = _mm_maskz_add_ps(0, a, b);
41992 assert_eq_m128(r, _mm_setzero_ps());
41993 let r = _mm_maskz_add_ps(0b00001111, a, b);
41994 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
41995 assert_eq_m128(r, e);
41996 }
41997
41998 #[simd_test(enable = "avx512f")]
41999 unsafe fn test_mm512_sub_epi32() {
42000 #[rustfmt::skip]
42001 let a = _mm512_setr_epi32(
42002 0, 1, -1, i32::MAX,
42003 i32::MIN, 100, -100, -32,
42004 0, 1, -1, i32::MAX,
42005 i32::MIN, 100, -100, -32,
42006 );
42007 let b = _mm512_set1_epi32(1);
42008 let r = _mm512_sub_epi32(a, b);
42009 #[rustfmt::skip]
42010 let e = _mm512_setr_epi32(
42011 -1, 0, -2, i32::MAX - 1,
42012 i32::MAX, 99, -101, -33,
42013 -1, 0, -2, i32::MAX - 1,
42014 i32::MAX, 99, -101, -33,
42015 );
42016 assert_eq_m512i(r, e);
42017 }
42018
42019 #[simd_test(enable = "avx512f")]
42020 unsafe fn test_mm512_mask_sub_epi32() {
42021 #[rustfmt::skip]
42022 let a = _mm512_setr_epi32(
42023 0, 1, -1, i32::MAX,
42024 i32::MIN, 100, -100, -32,
42025 0, 1, -1, i32::MAX,
42026 i32::MIN, 100, -100, -32,
42027 );
42028 let b = _mm512_set1_epi32(1);
42029 let r = _mm512_mask_sub_epi32(a, 0, a, b);
42030 assert_eq_m512i(r, a);
42031 let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
42032 #[rustfmt::skip]
42033 let e = _mm512_setr_epi32(
42034 -1, 0, -2, i32::MAX - 1,
42035 i32::MAX, 99, -101, -33,
42036 0, 1, -1, i32::MAX,
42037 i32::MIN, 100, -100, -32,
42038 );
42039 assert_eq_m512i(r, e);
42040 }
42041
42042 #[simd_test(enable = "avx512f")]
42043 unsafe fn test_mm512_maskz_sub_epi32() {
42044 #[rustfmt::skip]
42045 let a = _mm512_setr_epi32(
42046 0, 1, -1, i32::MAX,
42047 i32::MIN, 100, -100, -32,
42048 0, 1, -1, i32::MAX,
42049 i32::MIN, 100, -100, -32,
42050 );
42051 let b = _mm512_set1_epi32(1);
42052 let r = _mm512_maskz_sub_epi32(0, a, b);
42053 assert_eq_m512i(r, _mm512_setzero_si512());
42054 let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
42055 #[rustfmt::skip]
42056 let e = _mm512_setr_epi32(
42057 -1, 0, -2, i32::MAX - 1,
42058 i32::MAX, 99, -101, -33,
42059 0, 0, 0, 0,
42060 0, 0, 0, 0,
42061 );
42062 assert_eq_m512i(r, e);
42063 }
42064
42065 #[simd_test(enable = "avx512f,avx512vl")]
42066 unsafe fn test_mm256_mask_sub_epi32() {
42067 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
42068 let b = _mm256_set1_epi32(1);
42069 let r = _mm256_mask_sub_epi32(a, 0, a, b);
42070 assert_eq_m256i(r, a);
42071 let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
42072 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
42073 assert_eq_m256i(r, e);
42074 }
42075
42076 #[simd_test(enable = "avx512f,avx512vl")]
42077 unsafe fn test_mm256_maskz_sub_epi32() {
42078 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
42079 let b = _mm256_set1_epi32(1);
42080 let r = _mm256_maskz_sub_epi32(0, a, b);
42081 assert_eq_m256i(r, _mm256_setzero_si256());
42082 let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
42083 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
42084 assert_eq_m256i(r, e);
42085 }
42086
42087 #[simd_test(enable = "avx512f,avx512vl")]
42088 unsafe fn test_mm_mask_sub_epi32() {
42089 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
42090 let b = _mm_set1_epi32(1);
42091 let r = _mm_mask_sub_epi32(a, 0, a, b);
42092 assert_eq_m128i(r, a);
42093 let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
42094 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
42095 assert_eq_m128i(r, e);
42096 }
42097
42098 #[simd_test(enable = "avx512f,avx512vl")]
42099 unsafe fn test_mm_maskz_sub_epi32() {
42100 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
42101 let b = _mm_set1_epi32(1);
42102 let r = _mm_maskz_sub_epi32(0, a, b);
42103 assert_eq_m128i(r, _mm_setzero_si128());
42104 let r = _mm_maskz_sub_epi32(0b00001111, a, b);
42105 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
42106 assert_eq_m128i(r, e);
42107 }
42108
42109 #[simd_test(enable = "avx512f")]
42110 unsafe fn test_mm512_sub_ps() {
42111 #[rustfmt::skip]
42112 let a = _mm512_setr_ps(
42113 0., 1., -1., f32::MAX,
42114 f32::MIN, 100., -100., -32.,
42115 0., 1., -1., f32::MAX,
42116 f32::MIN, 100., -100., -32.,
42117 );
42118 let b = _mm512_set1_ps(1.);
42119 let r = _mm512_sub_ps(a, b);
42120 #[rustfmt::skip]
42121 let e = _mm512_setr_ps(
42122 -1., 0., -2., f32::MAX - 1.,
42123 f32::MIN, 99., -101., -33.,
42124 -1., 0., -2., f32::MAX - 1.,
42125 f32::MIN, 99., -101., -33.,
42126 );
42127 assert_eq_m512(r, e);
42128 }
42129
42130 #[simd_test(enable = "avx512f")]
42131 unsafe fn test_mm512_mask_sub_ps() {
42132 #[rustfmt::skip]
42133 let a = _mm512_setr_ps(
42134 0., 1., -1., f32::MAX,
42135 f32::MIN, 100., -100., -32.,
42136 0., 1., -1., f32::MAX,
42137 f32::MIN, 100., -100., -32.,
42138 );
42139 let b = _mm512_set1_ps(1.);
42140 let r = _mm512_mask_sub_ps(a, 0, a, b);
42141 assert_eq_m512(r, a);
42142 let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
42143 #[rustfmt::skip]
42144 let e = _mm512_setr_ps(
42145 -1., 0., -2., f32::MAX - 1.,
42146 f32::MIN, 99., -101., -33.,
42147 0., 1., -1., f32::MAX,
42148 f32::MIN, 100., -100., -32.,
42149 );
42150 assert_eq_m512(r, e);
42151 }
42152
42153 #[simd_test(enable = "avx512f")]
42154 unsafe fn test_mm512_maskz_sub_ps() {
42155 #[rustfmt::skip]
42156 let a = _mm512_setr_ps(
42157 0., 1., -1., f32::MAX,
42158 f32::MIN, 100., -100., -32.,
42159 0., 1., -1., f32::MAX,
42160 f32::MIN, 100., -100., -32.,
42161 );
42162 let b = _mm512_set1_ps(1.);
42163 let r = _mm512_maskz_sub_ps(0, a, b);
42164 assert_eq_m512(r, _mm512_setzero_ps());
42165 let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
42166 #[rustfmt::skip]
42167 let e = _mm512_setr_ps(
42168 -1., 0., -2., f32::MAX - 1.,
42169 f32::MIN, 99., -101., -33.,
42170 0., 0., 0., 0.,
42171 0., 0., 0., 0.,
42172 );
42173 assert_eq_m512(r, e);
42174 }
42175
42176 #[simd_test(enable = "avx512f,avx512vl")]
42177 unsafe fn test_mm256_mask_sub_ps() {
42178 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
42179 let b = _mm256_set1_ps(1.);
42180 let r = _mm256_mask_sub_ps(a, 0, a, b);
42181 assert_eq_m256(r, a);
42182 let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
42183 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
42184 assert_eq_m256(r, e);
42185 }
42186
42187 #[simd_test(enable = "avx512f,avx512vl")]
42188 unsafe fn test_mm256_maskz_sub_ps() {
42189 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
42190 let b = _mm256_set1_ps(1.);
42191 let r = _mm256_maskz_sub_ps(0, a, b);
42192 assert_eq_m256(r, _mm256_setzero_ps());
42193 let r = _mm256_maskz_sub_ps(0b11111111, a, b);
42194 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
42195 assert_eq_m256(r, e);
42196 }
42197
42198 #[simd_test(enable = "avx512f,avx512vl")]
42199 unsafe fn test_mm_mask_sub_ps() {
42200 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42201 let b = _mm_set1_ps(1.);
42202 let r = _mm_mask_sub_ps(a, 0, a, b);
42203 assert_eq_m128(r, a);
42204 let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
42205 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
42206 assert_eq_m128(r, e);
42207 }
42208
42209 #[simd_test(enable = "avx512f,avx512vl")]
42210 unsafe fn test_mm_maskz_sub_ps() {
42211 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42212 let b = _mm_set1_ps(1.);
42213 let r = _mm_maskz_sub_ps(0, a, b);
42214 assert_eq_m128(r, _mm_setzero_ps());
42215 let r = _mm_maskz_sub_ps(0b00001111, a, b);
42216 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
42217 assert_eq_m128(r, e);
42218 }
42219
42220 #[simd_test(enable = "avx512f")]
42221 unsafe fn test_mm512_mullo_epi32() {
42222 #[rustfmt::skip]
42223 let a = _mm512_setr_epi32(
42224 0, 1, -1, i32::MAX,
42225 i32::MIN, 100, -100, -32,
42226 0, 1, -1, i32::MAX,
42227 i32::MIN, 100, -100, -32,
42228 );
42229 let b = _mm512_set1_epi32(2);
42230 let r = _mm512_mullo_epi32(a, b);
42231 let e = _mm512_setr_epi32(
42232 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
42233 );
42234 assert_eq_m512i(r, e);
42235 }
42236
42237 #[simd_test(enable = "avx512f")]
42238 unsafe fn test_mm512_mask_mullo_epi32() {
42239 #[rustfmt::skip]
42240 let a = _mm512_setr_epi32(
42241 0, 1, -1, i32::MAX,
42242 i32::MIN, 100, -100, -32,
42243 0, 1, -1, i32::MAX,
42244 i32::MIN, 100, -100, -32,
42245 );
42246 let b = _mm512_set1_epi32(2);
42247 let r = _mm512_mask_mullo_epi32(a, 0, a, b);
42248 assert_eq_m512i(r, a);
42249 let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
42250 #[rustfmt::skip]
42251 let e = _mm512_setr_epi32(
42252 0, 2, -2, -2,
42253 0, 200, -200, -64,
42254 0, 1, -1, i32::MAX,
42255 i32::MIN, 100, -100, -32,
42256 );
42257 assert_eq_m512i(r, e);
42258 }
42259
42260 #[simd_test(enable = "avx512f")]
42261 unsafe fn test_mm512_maskz_mullo_epi32() {
42262 #[rustfmt::skip]
42263 let a = _mm512_setr_epi32(
42264 0, 1, -1, i32::MAX,
42265 i32::MIN, 100, -100, -32,
42266 0, 1, -1, i32::MAX,
42267 i32::MIN, 100, -100, -32,
42268 );
42269 let b = _mm512_set1_epi32(2);
42270 let r = _mm512_maskz_mullo_epi32(0, a, b);
42271 assert_eq_m512i(r, _mm512_setzero_si512());
42272 let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
42273 let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
42274 assert_eq_m512i(r, e);
42275 }
42276
42277 #[simd_test(enable = "avx512f,avx512vl")]
42278 unsafe fn test_mm256_mask_mullo_epi32() {
42279 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
42280 let b = _mm256_set1_epi32(2);
42281 let r = _mm256_mask_mullo_epi32(a, 0, a, b);
42282 assert_eq_m256i(r, a);
42283 let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
42284 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
42285 assert_eq_m256i(r, e);
42286 }
42287
42288 #[simd_test(enable = "avx512f,avx512vl")]
42289 unsafe fn test_mm256_maskz_mullo_epi32() {
42290 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
42291 let b = _mm256_set1_epi32(2);
42292 let r = _mm256_maskz_mullo_epi32(0, a, b);
42293 assert_eq_m256i(r, _mm256_setzero_si256());
42294 let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
42295 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
42296 assert_eq_m256i(r, e);
42297 }
42298
42299 #[simd_test(enable = "avx512f,avx512vl")]
42300 unsafe fn test_mm_mask_mullo_epi32() {
42301 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
42302 let b = _mm_set1_epi32(2);
42303 let r = _mm_mask_mullo_epi32(a, 0, a, b);
42304 assert_eq_m128i(r, a);
42305 let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
42306 let e = _mm_set_epi32(2, -2, -2, 0);
42307 assert_eq_m128i(r, e);
42308 }
42309
42310 #[simd_test(enable = "avx512f,avx512vl")]
42311 unsafe fn test_mm_maskz_mullo_epi32() {
42312 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
42313 let b = _mm_set1_epi32(2);
42314 let r = _mm_maskz_mullo_epi32(0, a, b);
42315 assert_eq_m128i(r, _mm_setzero_si128());
42316 let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
42317 let e = _mm_set_epi32(2, -2, -2, 0);
42318 assert_eq_m128i(r, e);
42319 }
42320
42321 #[simd_test(enable = "avx512f")]
42322 unsafe fn test_mm512_mul_ps() {
42323 #[rustfmt::skip]
42324 let a = _mm512_setr_ps(
42325 0., 1., -1., f32::MAX,
42326 f32::MIN, 100., -100., -32.,
42327 0., 1., -1., f32::MAX,
42328 f32::MIN, 100., -100., -32.,
42329 );
42330 let b = _mm512_set1_ps(2.);
42331 let r = _mm512_mul_ps(a, b);
42332 #[rustfmt::skip]
42333 let e = _mm512_setr_ps(
42334 0., 2., -2., f32::INFINITY,
42335 f32::NEG_INFINITY, 200., -200., -64.,
42336 0., 2., -2., f32::INFINITY,
42337 f32::NEG_INFINITY, 200., -200.,
42338 -64.,
42339 );
42340 assert_eq_m512(r, e);
42341 }
42342
42343 #[simd_test(enable = "avx512f")]
42344 unsafe fn test_mm512_mask_mul_ps() {
42345 #[rustfmt::skip]
42346 let a = _mm512_setr_ps(
42347 0., 1., -1., f32::MAX,
42348 f32::MIN, 100., -100., -32.,
42349 0., 1., -1., f32::MAX,
42350 f32::MIN, 100., -100., -32.,
42351 );
42352 let b = _mm512_set1_ps(2.);
42353 let r = _mm512_mask_mul_ps(a, 0, a, b);
42354 assert_eq_m512(r, a);
42355 let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
42356 #[rustfmt::skip]
42357 let e = _mm512_setr_ps(
42358 0., 2., -2., f32::INFINITY,
42359 f32::NEG_INFINITY, 200., -200., -64.,
42360 0., 1., -1., f32::MAX,
42361 f32::MIN, 100., -100., -32.,
42362 );
42363 assert_eq_m512(r, e);
42364 }
42365
42366 #[simd_test(enable = "avx512f")]
42367 unsafe fn test_mm512_maskz_mul_ps() {
42368 #[rustfmt::skip]
42369 let a = _mm512_setr_ps(
42370 0., 1., -1., f32::MAX,
42371 f32::MIN, 100., -100., -32.,
42372 0., 1., -1., f32::MAX,
42373 f32::MIN, 100., -100., -32.,
42374 );
42375 let b = _mm512_set1_ps(2.);
42376 let r = _mm512_maskz_mul_ps(0, a, b);
42377 assert_eq_m512(r, _mm512_setzero_ps());
42378 let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
42379 #[rustfmt::skip]
42380 let e = _mm512_setr_ps(
42381 0., 2., -2., f32::INFINITY,
42382 f32::NEG_INFINITY, 200., -200., -64.,
42383 0., 0., 0., 0.,
42384 0., 0., 0., 0.,
42385 );
42386 assert_eq_m512(r, e);
42387 }
42388
42389 #[simd_test(enable = "avx512f,avx512vl")]
42390 unsafe fn test_mm256_mask_mul_ps() {
42391 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
42392 let b = _mm256_set1_ps(2.);
42393 let r = _mm256_mask_mul_ps(a, 0, a, b);
42394 assert_eq_m256(r, a);
42395 let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
42396 #[rustfmt::skip]
42397 let e = _mm256_set_ps(
42398 0., 2., -2., f32::INFINITY,
42399 f32::NEG_INFINITY, 200., -200., -64.,
42400 );
42401 assert_eq_m256(r, e);
42402 }
42403
42404 #[simd_test(enable = "avx512f,avx512vl")]
42405 unsafe fn test_mm256_maskz_mul_ps() {
42406 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
42407 let b = _mm256_set1_ps(2.);
42408 let r = _mm256_maskz_mul_ps(0, a, b);
42409 assert_eq_m256(r, _mm256_setzero_ps());
42410 let r = _mm256_maskz_mul_ps(0b11111111, a, b);
42411 #[rustfmt::skip]
42412 let e = _mm256_set_ps(
42413 0., 2., -2., f32::INFINITY,
42414 f32::NEG_INFINITY, 200., -200., -64.,
42415 );
42416 assert_eq_m256(r, e);
42417 }
42418
42419 #[simd_test(enable = "avx512f,avx512vl")]
42420 unsafe fn test_mm_mask_mul_ps() {
42421 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42422 let b = _mm_set1_ps(2.);
42423 let r = _mm_mask_mul_ps(a, 0, a, b);
42424 assert_eq_m128(r, a);
42425 let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
42426 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
42427 assert_eq_m128(r, e);
42428 }
42429
42430 #[simd_test(enable = "avx512f,avx512vl")]
42431 unsafe fn test_mm_maskz_mul_ps() {
42432 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42433 let b = _mm_set1_ps(2.);
42434 let r = _mm_maskz_mul_ps(0, a, b);
42435 assert_eq_m128(r, _mm_setzero_ps());
42436 let r = _mm_maskz_mul_ps(0b00001111, a, b);
42437 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
42438 assert_eq_m128(r, e);
42439 }
42440
42441 #[simd_test(enable = "avx512f")]
42442 unsafe fn test_mm512_div_ps() {
42443 let a = _mm512_setr_ps(
42444 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
42445 );
42446 let b = _mm512_setr_ps(
42447 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
42448 );
42449 let r = _mm512_div_ps(a, b);
42450 #[rustfmt::skip]
42451 let e = _mm512_setr_ps(
42452 0., 0.5, -0.5, -1.,
42453 50., f32::INFINITY, -50., -16.,
42454 0., 0.5, -0.5, 500.,
42455 f32::NEG_INFINITY, 50., -50., -16.,
42456 );
42457 assert_eq_m512(r, e); // 0/0 = NAN
42458 }
42459
42460 #[simd_test(enable = "avx512f")]
42461 unsafe fn test_mm512_mask_div_ps() {
42462 let a = _mm512_setr_ps(
42463 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
42464 );
42465 let b = _mm512_setr_ps(
42466 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
42467 );
42468 let r = _mm512_mask_div_ps(a, 0, a, b);
42469 assert_eq_m512(r, a);
42470 let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
42471 #[rustfmt::skip]
42472 let e = _mm512_setr_ps(
42473 0., 0.5, -0.5, -1.,
42474 50., f32::INFINITY, -50., -16.,
42475 0., 1., -1., 1000.,
42476 -131., 100., -100., -32.,
42477 );
42478 assert_eq_m512(r, e);
42479 }
42480
42481 #[simd_test(enable = "avx512f")]
42482 unsafe fn test_mm512_maskz_div_ps() {
42483 let a = _mm512_setr_ps(
42484 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
42485 );
42486 let b = _mm512_setr_ps(
42487 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
42488 );
42489 let r = _mm512_maskz_div_ps(0, a, b);
42490 assert_eq_m512(r, _mm512_setzero_ps());
42491 let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
42492 #[rustfmt::skip]
42493 let e = _mm512_setr_ps(
42494 0., 0.5, -0.5, -1.,
42495 50., f32::INFINITY, -50., -16.,
42496 0., 0., 0., 0.,
42497 0., 0., 0., 0.,
42498 );
42499 assert_eq_m512(r, e);
42500 }
42501
42502 #[simd_test(enable = "avx512f,avx512vl")]
42503 unsafe fn test_mm256_mask_div_ps() {
42504 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
42505 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
42506 let r = _mm256_mask_div_ps(a, 0, a, b);
42507 assert_eq_m256(r, a);
42508 let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
42509 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
42510 assert_eq_m256(r, e);
42511 }
42512
42513 #[simd_test(enable = "avx512f,avx512vl")]
42514 unsafe fn test_mm256_maskz_div_ps() {
42515 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
42516 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
42517 let r = _mm256_maskz_div_ps(0, a, b);
42518 assert_eq_m256(r, _mm256_setzero_ps());
42519 let r = _mm256_maskz_div_ps(0b11111111, a, b);
42520 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
42521 assert_eq_m256(r, e);
42522 }
42523
42524 #[simd_test(enable = "avx512f,avx512vl")]
42525 unsafe fn test_mm_mask_div_ps() {
42526 let a = _mm_set_ps(100., 100., -100., -32.);
42527 let b = _mm_set_ps(2., 0., 2., 2.);
42528 let r = _mm_mask_div_ps(a, 0, a, b);
42529 assert_eq_m128(r, a);
42530 let r = _mm_mask_div_ps(a, 0b00001111, a, b);
42531 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
42532 assert_eq_m128(r, e);
42533 }
42534
42535 #[simd_test(enable = "avx512f,avx512vl")]
42536 unsafe fn test_mm_maskz_div_ps() {
42537 let a = _mm_set_ps(100., 100., -100., -32.);
42538 let b = _mm_set_ps(2., 0., 2., 2.);
42539 let r = _mm_maskz_div_ps(0, a, b);
42540 assert_eq_m128(r, _mm_setzero_ps());
42541 let r = _mm_maskz_div_ps(0b00001111, a, b);
42542 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
42543 assert_eq_m128(r, e);
42544 }
42545
42546 #[simd_test(enable = "avx512f")]
42547 unsafe fn test_mm512_max_epi32() {
42548 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42549 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42550 let r = _mm512_max_epi32(a, b);
42551 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42552 assert_eq_m512i(r, e);
42553 }
42554
42555 #[simd_test(enable = "avx512f")]
42556 unsafe fn test_mm512_mask_max_epi32() {
42557 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42558 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42559 let r = _mm512_mask_max_epi32(a, 0, a, b);
42560 assert_eq_m512i(r, a);
42561 let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
42562 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42563 assert_eq_m512i(r, e);
42564 }
42565
42566 #[simd_test(enable = "avx512f")]
42567 unsafe fn test_mm512_maskz_max_epi32() {
42568 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42569 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42570 let r = _mm512_maskz_max_epi32(0, a, b);
42571 assert_eq_m512i(r, _mm512_setzero_si512());
42572 let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
42573 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
42574 assert_eq_m512i(r, e);
42575 }
42576
42577 #[simd_test(enable = "avx512f,avx512vl")]
42578 unsafe fn test_mm256_mask_max_epi32() {
42579 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42580 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42581 let r = _mm256_mask_max_epi32(a, 0, a, b);
42582 assert_eq_m256i(r, a);
42583 let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
42584 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42585 assert_eq_m256i(r, e);
42586 }
42587
42588 #[simd_test(enable = "avx512f,avx512vl")]
42589 unsafe fn test_mm256_maskz_max_epi32() {
42590 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42591 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42592 let r = _mm256_maskz_max_epi32(0, a, b);
42593 assert_eq_m256i(r, _mm256_setzero_si256());
42594 let r = _mm256_maskz_max_epi32(0b11111111, a, b);
42595 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42596 assert_eq_m256i(r, e);
42597 }
42598
42599 #[simd_test(enable = "avx512f,avx512vl")]
42600 unsafe fn test_mm_mask_max_epi32() {
42601 let a = _mm_set_epi32(0, 1, 2, 3);
42602 let b = _mm_set_epi32(3, 2, 1, 0);
42603 let r = _mm_mask_max_epi32(a, 0, a, b);
42604 assert_eq_m128i(r, a);
42605 let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
42606 let e = _mm_set_epi32(3, 2, 2, 3);
42607 assert_eq_m128i(r, e);
42608 }
42609
42610 #[simd_test(enable = "avx512f,avx512vl")]
42611 unsafe fn test_mm_maskz_max_epi32() {
42612 let a = _mm_set_epi32(0, 1, 2, 3);
42613 let b = _mm_set_epi32(3, 2, 1, 0);
42614 let r = _mm_maskz_max_epi32(0, a, b);
42615 assert_eq_m128i(r, _mm_setzero_si128());
42616 let r = _mm_maskz_max_epi32(0b00001111, a, b);
42617 let e = _mm_set_epi32(3, 2, 2, 3);
42618 assert_eq_m128i(r, e);
42619 }
42620
42621 #[simd_test(enable = "avx512f")]
42622 unsafe fn test_mm512_max_ps() {
42623 let a = _mm512_setr_ps(
42624 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42625 );
42626 let b = _mm512_setr_ps(
42627 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42628 );
42629 let r = _mm512_max_ps(a, b);
42630 let e = _mm512_setr_ps(
42631 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
42632 );
42633 assert_eq_m512(r, e);
42634 }
42635
42636 #[simd_test(enable = "avx512f")]
42637 unsafe fn test_mm512_mask_max_ps() {
42638 let a = _mm512_setr_ps(
42639 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42640 );
42641 let b = _mm512_setr_ps(
42642 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42643 );
42644 let r = _mm512_mask_max_ps(a, 0, a, b);
42645 assert_eq_m512(r, a);
42646 let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
42647 let e = _mm512_setr_ps(
42648 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
42649 );
42650 assert_eq_m512(r, e);
42651 }
42652
42653 #[simd_test(enable = "avx512f")]
42654 unsafe fn test_mm512_maskz_max_ps() {
42655 let a = _mm512_setr_ps(
42656 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42657 );
42658 let b = _mm512_setr_ps(
42659 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42660 );
42661 let r = _mm512_maskz_max_ps(0, a, b);
42662 assert_eq_m512(r, _mm512_setzero_ps());
42663 let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
42664 let e = _mm512_setr_ps(
42665 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
42666 );
42667 assert_eq_m512(r, e);
42668 }
42669
42670 #[simd_test(enable = "avx512f,avx512vl")]
42671 unsafe fn test_mm256_mask_max_ps() {
42672 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42673 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42674 let r = _mm256_mask_max_ps(a, 0, a, b);
42675 assert_eq_m256(r, a);
42676 let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
42677 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
42678 assert_eq_m256(r, e);
42679 }
42680
42681 #[simd_test(enable = "avx512f,avx512vl")]
42682 unsafe fn test_mm256_maskz_max_ps() {
42683 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42684 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42685 let r = _mm256_maskz_max_ps(0, a, b);
42686 assert_eq_m256(r, _mm256_setzero_ps());
42687 let r = _mm256_maskz_max_ps(0b11111111, a, b);
42688 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
42689 assert_eq_m256(r, e);
42690 }
42691
42692 #[simd_test(enable = "avx512f,avx512vl")]
42693 unsafe fn test_mm_mask_max_ps() {
42694 let a = _mm_set_ps(0., 1., 2., 3.);
42695 let b = _mm_set_ps(3., 2., 1., 0.);
42696 let r = _mm_mask_max_ps(a, 0, a, b);
42697 assert_eq_m128(r, a);
42698 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
42699 let e = _mm_set_ps(3., 2., 2., 3.);
42700 assert_eq_m128(r, e);
42701 }
42702
42703 #[simd_test(enable = "avx512f,avx512vl")]
42704 unsafe fn test_mm_maskz_max_ps() {
42705 let a = _mm_set_ps(0., 1., 2., 3.);
42706 let b = _mm_set_ps(3., 2., 1., 0.);
42707 let r = _mm_maskz_max_ps(0, a, b);
42708 assert_eq_m128(r, _mm_setzero_ps());
42709 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
42710 let e = _mm_set_ps(3., 2., 2., 3.);
42711 assert_eq_m128(r, e);
42712 }
42713
42714 #[simd_test(enable = "avx512f")]
42715 unsafe fn test_mm512_max_epu32() {
42716 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42717 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42718 let r = _mm512_max_epu32(a, b);
42719 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42720 assert_eq_m512i(r, e);
42721 }
42722
42723 #[simd_test(enable = "avx512f")]
42724 unsafe fn test_mm512_mask_max_epu32() {
42725 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42726 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42727 let r = _mm512_mask_max_epu32(a, 0, a, b);
42728 assert_eq_m512i(r, a);
42729 let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
42730 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42731 assert_eq_m512i(r, e);
42732 }
42733
42734 #[simd_test(enable = "avx512f")]
42735 unsafe fn test_mm512_maskz_max_epu32() {
42736 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42737 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42738 let r = _mm512_maskz_max_epu32(0, a, b);
42739 assert_eq_m512i(r, _mm512_setzero_si512());
42740 let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
42741 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
42742 assert_eq_m512i(r, e);
42743 }
42744
42745 #[simd_test(enable = "avx512f,avx512vl")]
42746 unsafe fn test_mm256_mask_max_epu32() {
42747 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42748 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42749 let r = _mm256_mask_max_epu32(a, 0, a, b);
42750 assert_eq_m256i(r, a);
42751 let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
42752 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42753 assert_eq_m256i(r, e);
42754 }
42755
42756 #[simd_test(enable = "avx512f,avx512vl")]
42757 unsafe fn test_mm256_maskz_max_epu32() {
42758 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42759 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42760 let r = _mm256_maskz_max_epu32(0, a, b);
42761 assert_eq_m256i(r, _mm256_setzero_si256());
42762 let r = _mm256_maskz_max_epu32(0b11111111, a, b);
42763 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42764 assert_eq_m256i(r, e);
42765 }
42766
42767 #[simd_test(enable = "avx512f,avx512vl")]
42768 unsafe fn test_mm_mask_max_epu32() {
42769 let a = _mm_set_epi32(0, 1, 2, 3);
42770 let b = _mm_set_epi32(3, 2, 1, 0);
42771 let r = _mm_mask_max_epu32(a, 0, a, b);
42772 assert_eq_m128i(r, a);
42773 let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
42774 let e = _mm_set_epi32(3, 2, 2, 3);
42775 assert_eq_m128i(r, e);
42776 }
42777
42778 #[simd_test(enable = "avx512f,avx512vl")]
42779 unsafe fn test_mm_maskz_max_epu32() {
42780 let a = _mm_set_epi32(0, 1, 2, 3);
42781 let b = _mm_set_epi32(3, 2, 1, 0);
42782 let r = _mm_maskz_max_epu32(0, a, b);
42783 assert_eq_m128i(r, _mm_setzero_si128());
42784 let r = _mm_maskz_max_epu32(0b00001111, a, b);
42785 let e = _mm_set_epi32(3, 2, 2, 3);
42786 assert_eq_m128i(r, e);
42787 }
42788
42789 #[simd_test(enable = "avx512f")]
42790 unsafe fn test_mm512_min_epi32() {
42791 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42792 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42793 let r = _mm512_min_epi32(a, b);
42794 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
42795 assert_eq_m512i(r, e);
42796 }
42797
42798 #[simd_test(enable = "avx512f")]
42799 unsafe fn test_mm512_mask_min_epi32() {
42800 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42801 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42802 let r = _mm512_mask_min_epi32(a, 0, a, b);
42803 assert_eq_m512i(r, a);
42804 let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
42805 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42806 assert_eq_m512i(r, e);
42807 }
42808
42809 #[simd_test(enable = "avx512f")]
42810 unsafe fn test_mm512_maskz_min_epi32() {
42811 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42812 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42813 let r = _mm512_maskz_min_epi32(0, a, b);
42814 assert_eq_m512i(r, _mm512_setzero_si512());
42815 let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
42816 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
42817 assert_eq_m512i(r, e);
42818 }
42819
42820 #[simd_test(enable = "avx512f,avx512vl")]
42821 unsafe fn test_mm256_mask_min_epi32() {
42822 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42823 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42824 let r = _mm256_mask_min_epi32(a, 0, a, b);
42825 assert_eq_m256i(r, a);
42826 let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
42827 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42828 assert_eq_m256i(r, e);
42829 }
42830
42831 #[simd_test(enable = "avx512f,avx512vl")]
42832 unsafe fn test_mm256_maskz_min_epi32() {
42833 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42834 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42835 let r = _mm256_maskz_min_epi32(0, a, b);
42836 assert_eq_m256i(r, _mm256_setzero_si256());
42837 let r = _mm256_maskz_min_epi32(0b11111111, a, b);
42838 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42839 assert_eq_m256i(r, e);
42840 }
42841
42842 #[simd_test(enable = "avx512f,avx512vl")]
42843 unsafe fn test_mm_mask_min_epi32() {
42844 let a = _mm_set_epi32(0, 1, 2, 3);
42845 let b = _mm_set_epi32(3, 2, 1, 0);
42846 let r = _mm_mask_min_epi32(a, 0, a, b);
42847 assert_eq_m128i(r, a);
42848 let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
42849 let e = _mm_set_epi32(0, 1, 1, 0);
42850 assert_eq_m128i(r, e);
42851 }
42852
42853 #[simd_test(enable = "avx512f,avx512vl")]
42854 unsafe fn test_mm_maskz_min_epi32() {
42855 let a = _mm_set_epi32(0, 1, 2, 3);
42856 let b = _mm_set_epi32(3, 2, 1, 0);
42857 let r = _mm_maskz_min_epi32(0, a, b);
42858 assert_eq_m128i(r, _mm_setzero_si128());
42859 let r = _mm_maskz_min_epi32(0b00001111, a, b);
42860 let e = _mm_set_epi32(0, 1, 1, 0);
42861 assert_eq_m128i(r, e);
42862 }
42863
42864 #[simd_test(enable = "avx512f")]
42865 unsafe fn test_mm512_min_ps() {
42866 let a = _mm512_setr_ps(
42867 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42868 );
42869 let b = _mm512_setr_ps(
42870 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42871 );
42872 let r = _mm512_min_ps(a, b);
42873 let e = _mm512_setr_ps(
42874 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
42875 );
42876 assert_eq_m512(r, e);
42877 }
42878
42879 #[simd_test(enable = "avx512f")]
42880 unsafe fn test_mm512_mask_min_ps() {
42881 let a = _mm512_setr_ps(
42882 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42883 );
42884 let b = _mm512_setr_ps(
42885 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42886 );
42887 let r = _mm512_mask_min_ps(a, 0, a, b);
42888 assert_eq_m512(r, a);
42889 let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
42890 let e = _mm512_setr_ps(
42891 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42892 );
42893 assert_eq_m512(r, e);
42894 }
42895
42896 #[simd_test(enable = "avx512f")]
42897 unsafe fn test_mm512_maskz_min_ps() {
42898 let a = _mm512_setr_ps(
42899 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42900 );
42901 let b = _mm512_setr_ps(
42902 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42903 );
42904 let r = _mm512_maskz_min_ps(0, a, b);
42905 assert_eq_m512(r, _mm512_setzero_ps());
42906 let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
42907 let e = _mm512_setr_ps(
42908 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
42909 );
42910 assert_eq_m512(r, e);
42911 }
42912
42913 #[simd_test(enable = "avx512f,avx512vl")]
42914 unsafe fn test_mm256_mask_min_ps() {
42915 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42916 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42917 let r = _mm256_mask_min_ps(a, 0, a, b);
42918 assert_eq_m256(r, a);
42919 let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
42920 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
42921 assert_eq_m256(r, e);
42922 }
42923
42924 #[simd_test(enable = "avx512f,avx512vl")]
42925 unsafe fn test_mm256_maskz_min_ps() {
42926 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42927 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42928 let r = _mm256_maskz_min_ps(0, a, b);
42929 assert_eq_m256(r, _mm256_setzero_ps());
42930 let r = _mm256_maskz_min_ps(0b11111111, a, b);
42931 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
42932 assert_eq_m256(r, e);
42933 }
42934
42935 #[simd_test(enable = "avx512f,avx512vl")]
42936 unsafe fn test_mm_mask_min_ps() {
42937 let a = _mm_set_ps(0., 1., 2., 3.);
42938 let b = _mm_set_ps(3., 2., 1., 0.);
42939 let r = _mm_mask_min_ps(a, 0, a, b);
42940 assert_eq_m128(r, a);
42941 let r = _mm_mask_min_ps(a, 0b00001111, a, b);
42942 let e = _mm_set_ps(0., 1., 1., 0.);
42943 assert_eq_m128(r, e);
42944 }
42945
42946 #[simd_test(enable = "avx512f,avx512vl")]
42947 unsafe fn test_mm_maskz_min_ps() {
42948 let a = _mm_set_ps(0., 1., 2., 3.);
42949 let b = _mm_set_ps(3., 2., 1., 0.);
42950 let r = _mm_maskz_min_ps(0, a, b);
42951 assert_eq_m128(r, _mm_setzero_ps());
42952 let r = _mm_maskz_min_ps(0b00001111, a, b);
42953 let e = _mm_set_ps(0., 1., 1., 0.);
42954 assert_eq_m128(r, e);
42955 }
42956
42957 #[simd_test(enable = "avx512f")]
42958 unsafe fn test_mm512_min_epu32() {
42959 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42960 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42961 let r = _mm512_min_epu32(a, b);
42962 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
42963 assert_eq_m512i(r, e);
42964 }
42965
42966 #[simd_test(enable = "avx512f")]
42967 unsafe fn test_mm512_mask_min_epu32() {
42968 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42969 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42970 let r = _mm512_mask_min_epu32(a, 0, a, b);
42971 assert_eq_m512i(r, a);
42972 let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
42973 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42974 assert_eq_m512i(r, e);
42975 }
42976
42977 #[simd_test(enable = "avx512f")]
42978 unsafe fn test_mm512_maskz_min_epu32() {
42979 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42980 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42981 let r = _mm512_maskz_min_epu32(0, a, b);
42982 assert_eq_m512i(r, _mm512_setzero_si512());
42983 let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
42984 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
42985 assert_eq_m512i(r, e);
42986 }
42987
42988 #[simd_test(enable = "avx512f,avx512vl")]
42989 unsafe fn test_mm256_mask_min_epu32() {
42990 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42991 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42992 let r = _mm256_mask_min_epu32(a, 0, a, b);
42993 assert_eq_m256i(r, a);
42994 let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
42995 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42996 assert_eq_m256i(r, e);
42997 }
42998
42999 #[simd_test(enable = "avx512f,avx512vl")]
43000 unsafe fn test_mm256_maskz_min_epu32() {
43001 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
43002 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
43003 let r = _mm256_maskz_min_epu32(0, a, b);
43004 assert_eq_m256i(r, _mm256_setzero_si256());
43005 let r = _mm256_maskz_min_epu32(0b11111111, a, b);
43006 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
43007 assert_eq_m256i(r, e);
43008 }
43009
43010 #[simd_test(enable = "avx512f,avx512vl")]
43011 unsafe fn test_mm_mask_min_epu32() {
43012 let a = _mm_set_epi32(0, 1, 2, 3);
43013 let b = _mm_set_epi32(3, 2, 1, 0);
43014 let r = _mm_mask_min_epu32(a, 0, a, b);
43015 assert_eq_m128i(r, a);
43016 let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
43017 let e = _mm_set_epi32(0, 1, 1, 0);
43018 assert_eq_m128i(r, e);
43019 }
43020
43021 #[simd_test(enable = "avx512f,avx512vl")]
43022 unsafe fn test_mm_maskz_min_epu32() {
43023 let a = _mm_set_epi32(0, 1, 2, 3);
43024 let b = _mm_set_epi32(3, 2, 1, 0);
43025 let r = _mm_maskz_min_epu32(0, a, b);
43026 assert_eq_m128i(r, _mm_setzero_si128());
43027 let r = _mm_maskz_min_epu32(0b00001111, a, b);
43028 let e = _mm_set_epi32(0, 1, 1, 0);
43029 assert_eq_m128i(r, e);
43030 }
43031
43032 #[simd_test(enable = "avx512f")]
43033 unsafe fn test_mm512_sqrt_ps() {
43034 let a = _mm512_setr_ps(
43035 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
43036 );
43037 let r = _mm512_sqrt_ps(a);
43038 let e = _mm512_setr_ps(
43039 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43040 );
43041 assert_eq_m512(r, e);
43042 }
43043
43044 #[simd_test(enable = "avx512f")]
43045 unsafe fn test_mm512_mask_sqrt_ps() {
43046 let a = _mm512_setr_ps(
43047 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
43048 );
43049 let r = _mm512_mask_sqrt_ps(a, 0, a);
43050 assert_eq_m512(r, a);
43051 let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
43052 let e = _mm512_setr_ps(
43053 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
43054 );
43055 assert_eq_m512(r, e);
43056 }
43057
43058 #[simd_test(enable = "avx512f")]
43059 unsafe fn test_mm512_maskz_sqrt_ps() {
43060 let a = _mm512_setr_ps(
43061 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
43062 );
43063 let r = _mm512_maskz_sqrt_ps(0, a);
43064 assert_eq_m512(r, _mm512_setzero_ps());
43065 let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
43066 let e = _mm512_setr_ps(
43067 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
43068 );
43069 assert_eq_m512(r, e);
43070 }
43071
43072 #[simd_test(enable = "avx512f,avx512vl")]
43073 unsafe fn test_mm256_mask_sqrt_ps() {
43074 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
43075 let r = _mm256_mask_sqrt_ps(a, 0, a);
43076 assert_eq_m256(r, a);
43077 let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
43078 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43079 assert_eq_m256(r, e);
43080 }
43081
43082 #[simd_test(enable = "avx512f,avx512vl")]
43083 unsafe fn test_mm256_maskz_sqrt_ps() {
43084 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
43085 let r = _mm256_maskz_sqrt_ps(0, a);
43086 assert_eq_m256(r, _mm256_setzero_ps());
43087 let r = _mm256_maskz_sqrt_ps(0b11111111, a);
43088 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43089 assert_eq_m256(r, e);
43090 }
43091
43092 #[simd_test(enable = "avx512f,avx512vl")]
43093 unsafe fn test_mm_mask_sqrt_ps() {
43094 let a = _mm_set_ps(0., 1., 4., 9.);
43095 let r = _mm_mask_sqrt_ps(a, 0, a);
43096 assert_eq_m128(r, a);
43097 let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
43098 let e = _mm_set_ps(0., 1., 2., 3.);
43099 assert_eq_m128(r, e);
43100 }
43101
43102 #[simd_test(enable = "avx512f,avx512vl")]
43103 unsafe fn test_mm_maskz_sqrt_ps() {
43104 let a = _mm_set_ps(0., 1., 4., 9.);
43105 let r = _mm_maskz_sqrt_ps(0, a);
43106 assert_eq_m128(r, _mm_setzero_ps());
43107 let r = _mm_maskz_sqrt_ps(0b00001111, a);
43108 let e = _mm_set_ps(0., 1., 2., 3.);
43109 assert_eq_m128(r, e);
43110 }
43111
43112 #[simd_test(enable = "avx512f")]
43113 unsafe fn test_mm512_fmadd_ps() {
43114 let a = _mm512_set1_ps(1.);
43115 let b = _mm512_setr_ps(
43116 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43117 );
43118 let c = _mm512_set1_ps(1.);
43119 let r = _mm512_fmadd_ps(a, b, c);
43120 let e = _mm512_setr_ps(
43121 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
43122 );
43123 assert_eq_m512(r, e);
43124 }
43125
43126 #[simd_test(enable = "avx512f")]
43127 unsafe fn test_mm512_mask_fmadd_ps() {
43128 let a = _mm512_set1_ps(1.);
43129 let b = _mm512_setr_ps(
43130 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43131 );
43132 let c = _mm512_set1_ps(1.);
43133 let r = _mm512_mask_fmadd_ps(a, 0, b, c);
43134 assert_eq_m512(r, a);
43135 let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
43136 let e = _mm512_setr_ps(
43137 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
43138 );
43139 assert_eq_m512(r, e);
43140 }
43141
43142 #[simd_test(enable = "avx512f")]
43143 unsafe fn test_mm512_maskz_fmadd_ps() {
43144 let a = _mm512_set1_ps(1.);
43145 let b = _mm512_setr_ps(
43146 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43147 );
43148 let c = _mm512_set1_ps(1.);
43149 let r = _mm512_maskz_fmadd_ps(0, a, b, c);
43150 assert_eq_m512(r, _mm512_setzero_ps());
43151 let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
43152 let e = _mm512_setr_ps(
43153 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
43154 );
43155 assert_eq_m512(r, e);
43156 }
43157
43158 #[simd_test(enable = "avx512f")]
43159 unsafe fn test_mm512_mask3_fmadd_ps() {
43160 let a = _mm512_set1_ps(1.);
43161 let b = _mm512_setr_ps(
43162 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43163 );
43164 let c = _mm512_set1_ps(2.);
43165 let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
43166 assert_eq_m512(r, c);
43167 let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
43168 let e = _mm512_setr_ps(
43169 2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
43170 );
43171 assert_eq_m512(r, e);
43172 }
43173
43174 #[simd_test(enable = "avx512f,avx512vl")]
43175 unsafe fn test_mm256_mask_fmadd_ps() {
43176 let a = _mm256_set1_ps(1.);
43177 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43178 let c = _mm256_set1_ps(1.);
43179 let r = _mm256_mask_fmadd_ps(a, 0, b, c);
43180 assert_eq_m256(r, a);
43181 let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
43182 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
43183 assert_eq_m256(r, e);
43184 }
43185
43186 #[simd_test(enable = "avx512f,avx512vl")]
43187 unsafe fn test_mm256_maskz_fmadd_ps() {
43188 let a = _mm256_set1_ps(1.);
43189 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43190 let c = _mm256_set1_ps(1.);
43191 let r = _mm256_maskz_fmadd_ps(0, a, b, c);
43192 assert_eq_m256(r, _mm256_setzero_ps());
43193 let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
43194 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
43195 assert_eq_m256(r, e);
43196 }
43197
43198 #[simd_test(enable = "avx512f,avx512vl")]
43199 unsafe fn test_mm256_mask3_fmadd_ps() {
43200 let a = _mm256_set1_ps(1.);
43201 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43202 let c = _mm256_set1_ps(1.);
43203 let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
43204 assert_eq_m256(r, c);
43205 let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
43206 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
43207 assert_eq_m256(r, e);
43208 }
43209
43210 #[simd_test(enable = "avx512f,avx512vl")]
43211 unsafe fn test_mm_mask_fmadd_ps() {
43212 let a = _mm_set1_ps(1.);
43213 let b = _mm_set_ps(0., 1., 2., 3.);
43214 let c = _mm_set1_ps(1.);
43215 let r = _mm_mask_fmadd_ps(a, 0, b, c);
43216 assert_eq_m128(r, a);
43217 let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
43218 let e = _mm_set_ps(1., 2., 3., 4.);
43219 assert_eq_m128(r, e);
43220 }
43221
43222 #[simd_test(enable = "avx512f,avx512vl")]
43223 unsafe fn test_mm_maskz_fmadd_ps() {
43224 let a = _mm_set1_ps(1.);
43225 let b = _mm_set_ps(0., 1., 2., 3.);
43226 let c = _mm_set1_ps(1.);
43227 let r = _mm_maskz_fmadd_ps(0, a, b, c);
43228 assert_eq_m128(r, _mm_setzero_ps());
43229 let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
43230 let e = _mm_set_ps(1., 2., 3., 4.);
43231 assert_eq_m128(r, e);
43232 }
43233
43234 #[simd_test(enable = "avx512f,avx512vl")]
43235 unsafe fn test_mm_mask3_fmadd_ps() {
43236 let a = _mm_set1_ps(1.);
43237 let b = _mm_set_ps(0., 1., 2., 3.);
43238 let c = _mm_set1_ps(1.);
43239 let r = _mm_mask3_fmadd_ps(a, b, c, 0);
43240 assert_eq_m128(r, c);
43241 let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
43242 let e = _mm_set_ps(1., 2., 3., 4.);
43243 assert_eq_m128(r, e);
43244 }
43245
43246 #[simd_test(enable = "avx512f")]
43247 unsafe fn test_mm512_fmsub_ps() {
43248 let a = _mm512_setr_ps(
43249 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43250 );
43251 let b = _mm512_setr_ps(
43252 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43253 );
43254 let c = _mm512_setr_ps(
43255 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43256 );
43257 let r = _mm512_fmsub_ps(a, b, c);
43258 let e = _mm512_setr_ps(
43259 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
43260 );
43261 assert_eq_m512(r, e);
43262 }
43263
43264 #[simd_test(enable = "avx512f")]
43265 unsafe fn test_mm512_mask_fmsub_ps() {
43266 let a = _mm512_set1_ps(1.);
43267 let b = _mm512_setr_ps(
43268 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43269 );
43270 let c = _mm512_set1_ps(1.);
43271 let r = _mm512_mask_fmsub_ps(a, 0, b, c);
43272 assert_eq_m512(r, a);
43273 let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
43274 let e = _mm512_setr_ps(
43275 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
43276 );
43277 assert_eq_m512(r, e);
43278 }
43279
43280 #[simd_test(enable = "avx512f")]
43281 unsafe fn test_mm512_maskz_fmsub_ps() {
43282 let a = _mm512_set1_ps(1.);
43283 let b = _mm512_setr_ps(
43284 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43285 );
43286 let c = _mm512_set1_ps(1.);
43287 let r = _mm512_maskz_fmsub_ps(0, a, b, c);
43288 assert_eq_m512(r, _mm512_setzero_ps());
43289 let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
43290 let e = _mm512_setr_ps(
43291 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
43292 );
43293 assert_eq_m512(r, e);
43294 }
43295
43296 #[simd_test(enable = "avx512f")]
43297 unsafe fn test_mm512_mask3_fmsub_ps() {
43298 let a = _mm512_set1_ps(1.);
43299 let b = _mm512_setr_ps(
43300 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43301 );
43302 let c = _mm512_setr_ps(
43303 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43304 );
43305 let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
43306 assert_eq_m512(r, c);
43307 let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
43308 let e = _mm512_setr_ps(
43309 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
43310 );
43311 assert_eq_m512(r, e);
43312 }
43313
43314 #[simd_test(enable = "avx512f,avx512vl")]
43315 unsafe fn test_mm256_mask_fmsub_ps() {
43316 let a = _mm256_set1_ps(1.);
43317 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43318 let c = _mm256_set1_ps(1.);
43319 let r = _mm256_mask_fmsub_ps(a, 0, b, c);
43320 assert_eq_m256(r, a);
43321 let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
43322 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
43323 assert_eq_m256(r, e);
43324 }
43325
43326 #[simd_test(enable = "avx512f,avx512vl")]
43327 unsafe fn test_mm256_maskz_fmsub_ps() {
43328 let a = _mm256_set1_ps(1.);
43329 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43330 let c = _mm256_set1_ps(1.);
43331 let r = _mm256_maskz_fmsub_ps(0, a, b, c);
43332 assert_eq_m256(r, _mm256_setzero_ps());
43333 let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
43334 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
43335 assert_eq_m256(r, e);
43336 }
43337
43338 #[simd_test(enable = "avx512f,avx512vl")]
43339 unsafe fn test_mm256_mask3_fmsub_ps() {
43340 let a = _mm256_set1_ps(1.);
43341 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43342 let c = _mm256_set1_ps(1.);
43343 let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
43344 assert_eq_m256(r, c);
43345 let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
43346 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
43347 assert_eq_m256(r, e);
43348 }
43349
43350 #[simd_test(enable = "avx512f,avx512vl")]
43351 unsafe fn test_mm_mask_fmsub_ps() {
43352 let a = _mm_set1_ps(1.);
43353 let b = _mm_set_ps(0., 1., 2., 3.);
43354 let c = _mm_set1_ps(1.);
43355 let r = _mm_mask_fmsub_ps(a, 0, b, c);
43356 assert_eq_m128(r, a);
43357 let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
43358 let e = _mm_set_ps(-1., 0., 1., 2.);
43359 assert_eq_m128(r, e);
43360 }
43361
43362 #[simd_test(enable = "avx512f,avx512vl")]
43363 unsafe fn test_mm_maskz_fmsub_ps() {
43364 let a = _mm_set1_ps(1.);
43365 let b = _mm_set_ps(0., 1., 2., 3.);
43366 let c = _mm_set1_ps(1.);
43367 let r = _mm_maskz_fmsub_ps(0, a, b, c);
43368 assert_eq_m128(r, _mm_setzero_ps());
43369 let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
43370 let e = _mm_set_ps(-1., 0., 1., 2.);
43371 assert_eq_m128(r, e);
43372 }
43373
43374 #[simd_test(enable = "avx512f,avx512vl")]
43375 unsafe fn test_mm_mask3_fmsub_ps() {
43376 let a = _mm_set1_ps(1.);
43377 let b = _mm_set_ps(0., 1., 2., 3.);
43378 let c = _mm_set1_ps(1.);
43379 let r = _mm_mask3_fmsub_ps(a, b, c, 0);
43380 assert_eq_m128(r, c);
43381 let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
43382 let e = _mm_set_ps(-1., 0., 1., 2.);
43383 assert_eq_m128(r, e);
43384 }
43385
43386 #[simd_test(enable = "avx512f")]
43387 unsafe fn test_mm512_fmaddsub_ps() {
43388 let a = _mm512_set1_ps(1.);
43389 let b = _mm512_setr_ps(
43390 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43391 );
43392 let c = _mm512_set1_ps(1.);
43393 let r = _mm512_fmaddsub_ps(a, b, c);
43394 let e = _mm512_setr_ps(
43395 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
43396 );
43397 assert_eq_m512(r, e);
43398 }
43399
43400 #[simd_test(enable = "avx512f")]
43401 unsafe fn test_mm512_mask_fmaddsub_ps() {
43402 let a = _mm512_set1_ps(1.);
43403 let b = _mm512_setr_ps(
43404 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43405 );
43406 let c = _mm512_set1_ps(1.);
43407 let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
43408 assert_eq_m512(r, a);
43409 let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
43410 let e = _mm512_setr_ps(
43411 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
43412 );
43413 assert_eq_m512(r, e);
43414 }
43415
43416 #[simd_test(enable = "avx512f")]
43417 unsafe fn test_mm512_maskz_fmaddsub_ps() {
43418 let a = _mm512_set1_ps(1.);
43419 let b = _mm512_setr_ps(
43420 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43421 );
43422 let c = _mm512_set1_ps(1.);
43423 let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
43424 assert_eq_m512(r, _mm512_setzero_ps());
43425 let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
43426 let e = _mm512_setr_ps(
43427 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
43428 );
43429 assert_eq_m512(r, e);
43430 }
43431
43432 #[simd_test(enable = "avx512f")]
43433 unsafe fn test_mm512_mask3_fmaddsub_ps() {
43434 let a = _mm512_set1_ps(1.);
43435 let b = _mm512_setr_ps(
43436 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43437 );
43438 let c = _mm512_setr_ps(
43439 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43440 );
43441 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
43442 assert_eq_m512(r, c);
43443 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
43444 let e = _mm512_setr_ps(
43445 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
43446 );
43447 assert_eq_m512(r, e);
43448 }
43449
43450 #[simd_test(enable = "avx512f,avx512vl")]
43451 unsafe fn test_mm256_mask_fmaddsub_ps() {
43452 let a = _mm256_set1_ps(1.);
43453 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43454 let c = _mm256_set1_ps(1.);
43455 let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
43456 assert_eq_m256(r, a);
43457 let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
43458 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
43459 assert_eq_m256(r, e);
43460 }
43461
43462 #[simd_test(enable = "avx512f,avx512vl")]
43463 unsafe fn test_mm256_maskz_fmaddsub_ps() {
43464 let a = _mm256_set1_ps(1.);
43465 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43466 let c = _mm256_set1_ps(1.);
43467 let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
43468 assert_eq_m256(r, _mm256_setzero_ps());
43469 let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
43470 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
43471 assert_eq_m256(r, e);
43472 }
43473
43474 #[simd_test(enable = "avx512f,avx512vl")]
43475 unsafe fn test_mm256_mask3_fmaddsub_ps() {
43476 let a = _mm256_set1_ps(1.);
43477 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43478 let c = _mm256_set1_ps(1.);
43479 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
43480 assert_eq_m256(r, c);
43481 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
43482 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
43483 assert_eq_m256(r, e);
43484 }
43485
43486 #[simd_test(enable = "avx512f,avx512vl")]
43487 unsafe fn test_mm_mask_fmaddsub_ps() {
43488 let a = _mm_set1_ps(1.);
43489 let b = _mm_set_ps(0., 1., 2., 3.);
43490 let c = _mm_set1_ps(1.);
43491 let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
43492 assert_eq_m128(r, a);
43493 let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
43494 let e = _mm_set_ps(1., 0., 3., 2.);
43495 assert_eq_m128(r, e);
43496 }
43497
43498 #[simd_test(enable = "avx512f,avx512vl")]
43499 unsafe fn test_mm_maskz_fmaddsub_ps() {
43500 let a = _mm_set1_ps(1.);
43501 let b = _mm_set_ps(0., 1., 2., 3.);
43502 let c = _mm_set1_ps(1.);
43503 let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
43504 assert_eq_m128(r, _mm_setzero_ps());
43505 let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
43506 let e = _mm_set_ps(1., 0., 3., 2.);
43507 assert_eq_m128(r, e);
43508 }
43509
43510 #[simd_test(enable = "avx512f,avx512vl")]
43511 unsafe fn test_mm_mask3_fmaddsub_ps() {
43512 let a = _mm_set1_ps(1.);
43513 let b = _mm_set_ps(0., 1., 2., 3.);
43514 let c = _mm_set1_ps(1.);
43515 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
43516 assert_eq_m128(r, c);
43517 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
43518 let e = _mm_set_ps(1., 0., 3., 2.);
43519 assert_eq_m128(r, e);
43520 }
43521
43522 #[simd_test(enable = "avx512f")]
43523 unsafe fn test_mm512_fmsubadd_ps() {
43524 let a = _mm512_setr_ps(
43525 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43526 );
43527 let b = _mm512_setr_ps(
43528 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43529 );
43530 let c = _mm512_setr_ps(
43531 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43532 );
43533 let r = _mm512_fmsubadd_ps(a, b, c);
43534 let e = _mm512_setr_ps(
43535 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
43536 );
43537 assert_eq_m512(r, e);
43538 }
43539
43540 #[simd_test(enable = "avx512f")]
43541 unsafe fn test_mm512_mask_fmsubadd_ps() {
43542 let a = _mm512_set1_ps(1.);
43543 let b = _mm512_setr_ps(
43544 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43545 );
43546 let c = _mm512_set1_ps(1.);
43547 let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
43548 assert_eq_m512(r, a);
43549 let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
43550 let e = _mm512_setr_ps(
43551 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
43552 );
43553 assert_eq_m512(r, e);
43554 }
43555
43556 #[simd_test(enable = "avx512f")]
43557 unsafe fn test_mm512_maskz_fmsubadd_ps() {
43558 let a = _mm512_set1_ps(1.);
43559 let b = _mm512_setr_ps(
43560 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43561 );
43562 let c = _mm512_set1_ps(1.);
43563 let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
43564 assert_eq_m512(r, _mm512_setzero_ps());
43565 let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
43566 let e = _mm512_setr_ps(
43567 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
43568 );
43569 assert_eq_m512(r, e);
43570 }
43571
43572 #[simd_test(enable = "avx512f")]
43573 unsafe fn test_mm512_mask3_fmsubadd_ps() {
43574 let a = _mm512_set1_ps(1.);
43575 let b = _mm512_setr_ps(
43576 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43577 );
43578 let c = _mm512_setr_ps(
43579 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43580 );
43581 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
43582 assert_eq_m512(r, c);
43583 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
43584 let e = _mm512_setr_ps(
43585 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
43586 );
43587 assert_eq_m512(r, e);
43588 }
43589
43590 #[simd_test(enable = "avx512f,avx512vl")]
43591 unsafe fn test_mm256_mask_fmsubadd_ps() {
43592 let a = _mm256_set1_ps(1.);
43593 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43594 let c = _mm256_set1_ps(1.);
43595 let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
43596 assert_eq_m256(r, a);
43597 let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
43598 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
43599 assert_eq_m256(r, e);
43600 }
43601
43602 #[simd_test(enable = "avx512f,avx512vl")]
43603 unsafe fn test_mm256_maskz_fmsubadd_ps() {
43604 let a = _mm256_set1_ps(1.);
43605 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43606 let c = _mm256_set1_ps(1.);
43607 let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
43608 assert_eq_m256(r, _mm256_setzero_ps());
43609 let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
43610 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
43611 assert_eq_m256(r, e);
43612 }
43613
43614 #[simd_test(enable = "avx512f,avx512vl")]
43615 unsafe fn test_mm256_mask3_fmsubadd_ps() {
43616 let a = _mm256_set1_ps(1.);
43617 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43618 let c = _mm256_set1_ps(1.);
43619 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
43620 assert_eq_m256(r, c);
43621 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
43622 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
43623 assert_eq_m256(r, e);
43624 }
43625
43626 #[simd_test(enable = "avx512f,avx512vl")]
43627 unsafe fn test_mm_mask_fmsubadd_ps() {
43628 let a = _mm_set1_ps(1.);
43629 let b = _mm_set_ps(0., 1., 2., 3.);
43630 let c = _mm_set1_ps(1.);
43631 let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
43632 assert_eq_m128(r, a);
43633 let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
43634 let e = _mm_set_ps(-1., 2., 1., 4.);
43635 assert_eq_m128(r, e);
43636 }
43637
43638 #[simd_test(enable = "avx512f,avx512vl")]
43639 unsafe fn test_mm_maskz_fmsubadd_ps() {
43640 let a = _mm_set1_ps(1.);
43641 let b = _mm_set_ps(0., 1., 2., 3.);
43642 let c = _mm_set1_ps(1.);
43643 let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
43644 assert_eq_m128(r, _mm_setzero_ps());
43645 let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
43646 let e = _mm_set_ps(-1., 2., 1., 4.);
43647 assert_eq_m128(r, e);
43648 }
43649
43650 #[simd_test(enable = "avx512f,avx512vl")]
43651 unsafe fn test_mm_mask3_fmsubadd_ps() {
43652 let a = _mm_set1_ps(1.);
43653 let b = _mm_set_ps(0., 1., 2., 3.);
43654 let c = _mm_set1_ps(1.);
43655 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
43656 assert_eq_m128(r, c);
43657 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
43658 let e = _mm_set_ps(-1., 2., 1., 4.);
43659 assert_eq_m128(r, e);
43660 }
43661
43662 #[simd_test(enable = "avx512f")]
43663 unsafe fn test_mm512_fnmadd_ps() {
43664 let a = _mm512_set1_ps(1.);
43665 let b = _mm512_setr_ps(
43666 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43667 );
43668 let c = _mm512_set1_ps(1.);
43669 let r = _mm512_fnmadd_ps(a, b, c);
43670 let e = _mm512_setr_ps(
43671 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
43672 );
43673 assert_eq_m512(r, e);
43674 }
43675
43676 #[simd_test(enable = "avx512f")]
43677 unsafe fn test_mm512_mask_fnmadd_ps() {
43678 let a = _mm512_set1_ps(1.);
43679 let b = _mm512_setr_ps(
43680 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43681 );
43682 let c = _mm512_set1_ps(1.);
43683 let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
43684 assert_eq_m512(r, a);
43685 let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
43686 let e = _mm512_setr_ps(
43687 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
43688 );
43689 assert_eq_m512(r, e);
43690 }
43691
43692 #[simd_test(enable = "avx512f")]
43693 unsafe fn test_mm512_maskz_fnmadd_ps() {
43694 let a = _mm512_set1_ps(1.);
43695 let b = _mm512_setr_ps(
43696 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43697 );
43698 let c = _mm512_set1_ps(1.);
43699 let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
43700 assert_eq_m512(r, _mm512_setzero_ps());
43701 let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
43702 let e = _mm512_setr_ps(
43703 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
43704 );
43705 assert_eq_m512(r, e);
43706 }
43707
43708 #[simd_test(enable = "avx512f")]
43709 unsafe fn test_mm512_mask3_fnmadd_ps() {
43710 let a = _mm512_set1_ps(1.);
43711 let b = _mm512_setr_ps(
43712 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43713 );
43714 let c = _mm512_setr_ps(
43715 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43716 );
43717 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
43718 assert_eq_m512(r, c);
43719 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
43720 let e = _mm512_setr_ps(
43721 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
43722 );
43723 assert_eq_m512(r, e);
43724 }
43725
43726 #[simd_test(enable = "avx512f,avx512vl")]
43727 unsafe fn test_mm256_mask_fnmadd_ps() {
43728 let a = _mm256_set1_ps(1.);
43729 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43730 let c = _mm256_set1_ps(1.);
43731 let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
43732 assert_eq_m256(r, a);
43733 let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
43734 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
43735 assert_eq_m256(r, e);
43736 }
43737
43738 #[simd_test(enable = "avx512f,avx512vl")]
43739 unsafe fn test_mm256_maskz_fnmadd_ps() {
43740 let a = _mm256_set1_ps(1.);
43741 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43742 let c = _mm256_set1_ps(1.);
43743 let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
43744 assert_eq_m256(r, _mm256_setzero_ps());
43745 let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
43746 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
43747 assert_eq_m256(r, e);
43748 }
43749
43750 #[simd_test(enable = "avx512f,avx512vl")]
43751 unsafe fn test_mm256_mask3_fnmadd_ps() {
43752 let a = _mm256_set1_ps(1.);
43753 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43754 let c = _mm256_set1_ps(1.);
43755 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
43756 assert_eq_m256(r, c);
43757 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
43758 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
43759 assert_eq_m256(r, e);
43760 }
43761
43762 #[simd_test(enable = "avx512f,avx512vl")]
43763 unsafe fn test_mm_mask_fnmadd_ps() {
43764 let a = _mm_set1_ps(1.);
43765 let b = _mm_set_ps(0., 1., 2., 3.);
43766 let c = _mm_set1_ps(1.);
43767 let r = _mm_mask_fnmadd_ps(a, 0, b, c);
43768 assert_eq_m128(r, a);
43769 let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
43770 let e = _mm_set_ps(1., 0., -1., -2.);
43771 assert_eq_m128(r, e);
43772 }
43773
43774 #[simd_test(enable = "avx512f,avx512vl")]
43775 unsafe fn test_mm_maskz_fnmadd_ps() {
43776 let a = _mm_set1_ps(1.);
43777 let b = _mm_set_ps(0., 1., 2., 3.);
43778 let c = _mm_set1_ps(1.);
43779 let r = _mm_maskz_fnmadd_ps(0, a, b, c);
43780 assert_eq_m128(r, _mm_setzero_ps());
43781 let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
43782 let e = _mm_set_ps(1., 0., -1., -2.);
43783 assert_eq_m128(r, e);
43784 }
43785
43786 #[simd_test(enable = "avx512f,avx512vl")]
43787 unsafe fn test_mm_mask3_fnmadd_ps() {
43788 let a = _mm_set1_ps(1.);
43789 let b = _mm_set_ps(0., 1., 2., 3.);
43790 let c = _mm_set1_ps(1.);
43791 let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
43792 assert_eq_m128(r, c);
43793 let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
43794 let e = _mm_set_ps(1., 0., -1., -2.);
43795 assert_eq_m128(r, e);
43796 }
43797
43798 #[simd_test(enable = "avx512f")]
43799 unsafe fn test_mm512_fnmsub_ps() {
43800 let a = _mm512_set1_ps(1.);
43801 let b = _mm512_setr_ps(
43802 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43803 );
43804 let c = _mm512_set1_ps(1.);
43805 let r = _mm512_fnmsub_ps(a, b, c);
43806 let e = _mm512_setr_ps(
43807 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
43808 );
43809 assert_eq_m512(r, e);
43810 }
43811
43812 #[simd_test(enable = "avx512f")]
43813 unsafe fn test_mm512_mask_fnmsub_ps() {
43814 let a = _mm512_set1_ps(1.);
43815 let b = _mm512_setr_ps(
43816 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43817 );
43818 let c = _mm512_set1_ps(1.);
43819 let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
43820 assert_eq_m512(r, a);
43821 let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
43822 let e = _mm512_setr_ps(
43823 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
43824 );
43825 assert_eq_m512(r, e);
43826 }
43827
43828 #[simd_test(enable = "avx512f")]
43829 unsafe fn test_mm512_maskz_fnmsub_ps() {
43830 let a = _mm512_set1_ps(1.);
43831 let b = _mm512_setr_ps(
43832 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43833 );
43834 let c = _mm512_set1_ps(1.);
43835 let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
43836 assert_eq_m512(r, _mm512_setzero_ps());
43837 let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
43838 let e = _mm512_setr_ps(
43839 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
43840 );
43841 assert_eq_m512(r, e);
43842 }
43843
43844 #[simd_test(enable = "avx512f")]
43845 unsafe fn test_mm512_mask3_fnmsub_ps() {
43846 let a = _mm512_set1_ps(1.);
43847 let b = _mm512_setr_ps(
43848 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43849 );
43850 let c = _mm512_setr_ps(
43851 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43852 );
43853 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
43854 assert_eq_m512(r, c);
43855 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
43856 let e = _mm512_setr_ps(
43857 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
43858 );
43859 assert_eq_m512(r, e);
43860 }
43861
43862 #[simd_test(enable = "avx512f,avx512vl")]
43863 unsafe fn test_mm256_mask_fnmsub_ps() {
43864 let a = _mm256_set1_ps(1.);
43865 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43866 let c = _mm256_set1_ps(1.);
43867 let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
43868 assert_eq_m256(r, a);
43869 let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
43870 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
43871 assert_eq_m256(r, e);
43872 }
43873
43874 #[simd_test(enable = "avx512f,avx512vl")]
43875 unsafe fn test_mm256_maskz_fnmsub_ps() {
43876 let a = _mm256_set1_ps(1.);
43877 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43878 let c = _mm256_set1_ps(1.);
43879 let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
43880 assert_eq_m256(r, _mm256_setzero_ps());
43881 let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
43882 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
43883 assert_eq_m256(r, e);
43884 }
43885
43886 #[simd_test(enable = "avx512f,avx512vl")]
43887 unsafe fn test_mm256_mask3_fnmsub_ps() {
43888 let a = _mm256_set1_ps(1.);
43889 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43890 let c = _mm256_set1_ps(1.);
43891 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
43892 assert_eq_m256(r, c);
43893 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
43894 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
43895 assert_eq_m256(r, e);
43896 }
43897
43898 #[simd_test(enable = "avx512f,avx512vl")]
43899 unsafe fn test_mm_mask_fnmsub_ps() {
43900 let a = _mm_set1_ps(1.);
43901 let b = _mm_set_ps(0., 1., 2., 3.);
43902 let c = _mm_set1_ps(1.);
43903 let r = _mm_mask_fnmsub_ps(a, 0, b, c);
43904 assert_eq_m128(r, a);
43905 let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
43906 let e = _mm_set_ps(-1., -2., -3., -4.);
43907 assert_eq_m128(r, e);
43908 }
43909
43910 #[simd_test(enable = "avx512f,avx512vl")]
43911 unsafe fn test_mm_maskz_fnmsub_ps() {
43912 let a = _mm_set1_ps(1.);
43913 let b = _mm_set_ps(0., 1., 2., 3.);
43914 let c = _mm_set1_ps(1.);
43915 let r = _mm_maskz_fnmsub_ps(0, a, b, c);
43916 assert_eq_m128(r, _mm_setzero_ps());
43917 let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
43918 let e = _mm_set_ps(-1., -2., -3., -4.);
43919 assert_eq_m128(r, e);
43920 }
43921
43922 #[simd_test(enable = "avx512f,avx512vl")]
43923 unsafe fn test_mm_mask3_fnmsub_ps() {
43924 let a = _mm_set1_ps(1.);
43925 let b = _mm_set_ps(0., 1., 2., 3.);
43926 let c = _mm_set1_ps(1.);
43927 let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
43928 assert_eq_m128(r, c);
43929 let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
43930 let e = _mm_set_ps(-1., -2., -3., -4.);
43931 assert_eq_m128(r, e);
43932 }
43933
43934 #[simd_test(enable = "avx512f")]
43935 unsafe fn test_mm512_rcp14_ps() {
43936 let a = _mm512_set1_ps(3.);
43937 let r = _mm512_rcp14_ps(a);
43938 let e = _mm512_set1_ps(0.33333206);
43939 assert_eq_m512(r, e);
43940 }
43941
43942 #[simd_test(enable = "avx512f")]
43943 unsafe fn test_mm512_mask_rcp14_ps() {
43944 let a = _mm512_set1_ps(3.);
43945 let r = _mm512_mask_rcp14_ps(a, 0, a);
43946 assert_eq_m512(r, a);
43947 let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
43948 let e = _mm512_setr_ps(
43949 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
43950 0.33333206, 0.33333206, 0.33333206, 0.33333206,
43951 );
43952 assert_eq_m512(r, e);
43953 }
43954
43955 #[simd_test(enable = "avx512f")]
43956 unsafe fn test_mm512_maskz_rcp14_ps() {
43957 let a = _mm512_set1_ps(3.);
43958 let r = _mm512_maskz_rcp14_ps(0, a);
43959 assert_eq_m512(r, _mm512_setzero_ps());
43960 let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
43961 let e = _mm512_setr_ps(
43962 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
43963 0.33333206, 0.33333206, 0.33333206, 0.33333206,
43964 );
43965 assert_eq_m512(r, e);
43966 }
43967
43968 #[simd_test(enable = "avx512f,avx512vl")]
43969 unsafe fn test_mm256_rcp14_ps() {
43970 let a = _mm256_set1_ps(3.);
43971 let r = _mm256_rcp14_ps(a);
43972 let e = _mm256_set1_ps(0.33333206);
43973 assert_eq_m256(r, e);
43974 }
43975
43976 #[simd_test(enable = "avx512f,avx512vl")]
43977 unsafe fn test_mm256_mask_rcp14_ps() {
43978 let a = _mm256_set1_ps(3.);
43979 let r = _mm256_mask_rcp14_ps(a, 0, a);
43980 assert_eq_m256(r, a);
43981 let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
43982 let e = _mm256_set1_ps(0.33333206);
43983 assert_eq_m256(r, e);
43984 }
43985
43986 #[simd_test(enable = "avx512f,avx512vl")]
43987 unsafe fn test_mm256_maskz_rcp14_ps() {
43988 let a = _mm256_set1_ps(3.);
43989 let r = _mm256_maskz_rcp14_ps(0, a);
43990 assert_eq_m256(r, _mm256_setzero_ps());
43991 let r = _mm256_maskz_rcp14_ps(0b11111111, a);
43992 let e = _mm256_set1_ps(0.33333206);
43993 assert_eq_m256(r, e);
43994 }
43995
43996 #[simd_test(enable = "avx512f,avx512vl")]
43997 unsafe fn test_mm_rcp14_ps() {
43998 let a = _mm_set1_ps(3.);
43999 let r = _mm_rcp14_ps(a);
44000 let e = _mm_set1_ps(0.33333206);
44001 assert_eq_m128(r, e);
44002 }
44003
44004 #[simd_test(enable = "avx512f,avx512vl")]
44005 unsafe fn test_mm_mask_rcp14_ps() {
44006 let a = _mm_set1_ps(3.);
44007 let r = _mm_mask_rcp14_ps(a, 0, a);
44008 assert_eq_m128(r, a);
44009 let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
44010 let e = _mm_set1_ps(0.33333206);
44011 assert_eq_m128(r, e);
44012 }
44013
44014 #[simd_test(enable = "avx512f,avx512vl")]
44015 unsafe fn test_mm_maskz_rcp14_ps() {
44016 let a = _mm_set1_ps(3.);
44017 let r = _mm_maskz_rcp14_ps(0, a);
44018 assert_eq_m128(r, _mm_setzero_ps());
44019 let r = _mm_maskz_rcp14_ps(0b00001111, a);
44020 let e = _mm_set1_ps(0.33333206);
44021 assert_eq_m128(r, e);
44022 }
44023
44024 #[simd_test(enable = "avx512f")]
44025 unsafe fn test_mm512_rsqrt14_ps() {
44026 let a = _mm512_set1_ps(3.);
44027 let r = _mm512_rsqrt14_ps(a);
44028 let e = _mm512_set1_ps(0.5773392);
44029 assert_eq_m512(r, e);
44030 }
44031
44032 #[simd_test(enable = "avx512f")]
44033 unsafe fn test_mm512_mask_rsqrt14_ps() {
44034 let a = _mm512_set1_ps(3.);
44035 let r = _mm512_mask_rsqrt14_ps(a, 0, a);
44036 assert_eq_m512(r, a);
44037 let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
44038 let e = _mm512_setr_ps(
44039 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
44040 0.5773392, 0.5773392, 0.5773392,
44041 );
44042 assert_eq_m512(r, e);
44043 }
44044
44045 #[simd_test(enable = "avx512f")]
44046 unsafe fn test_mm512_maskz_rsqrt14_ps() {
44047 let a = _mm512_set1_ps(3.);
44048 let r = _mm512_maskz_rsqrt14_ps(0, a);
44049 assert_eq_m512(r, _mm512_setzero_ps());
44050 let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
44051 let e = _mm512_setr_ps(
44052 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
44053 0.5773392, 0.5773392, 0.5773392,
44054 );
44055 assert_eq_m512(r, e);
44056 }
44057
44058 #[simd_test(enable = "avx512f,avx512vl")]
44059 unsafe fn test_mm256_mask_rsqrt14_ps() {
44060 let a = _mm256_set1_ps(3.);
44061 let r = _mm256_mask_rsqrt14_ps(a, 0, a);
44062 assert_eq_m256(r, a);
44063 let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
44064 let e = _mm256_set1_ps(0.5773392);
44065 assert_eq_m256(r, e);
44066 }
44067
44068 #[simd_test(enable = "avx512f,avx512vl")]
44069 unsafe fn test_mm256_maskz_rsqrt14_ps() {
44070 let a = _mm256_set1_ps(3.);
44071 let r = _mm256_maskz_rsqrt14_ps(0, a);
44072 assert_eq_m256(r, _mm256_setzero_ps());
44073 let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
44074 let e = _mm256_set1_ps(0.5773392);
44075 assert_eq_m256(r, e);
44076 }
44077
44078 #[simd_test(enable = "avx512f,avx512vl")]
44079 unsafe fn test_mm_mask_rsqrt14_ps() {
44080 let a = _mm_set1_ps(3.);
44081 let r = _mm_mask_rsqrt14_ps(a, 0, a);
44082 assert_eq_m128(r, a);
44083 let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
44084 let e = _mm_set1_ps(0.5773392);
44085 assert_eq_m128(r, e);
44086 }
44087
44088 #[simd_test(enable = "avx512f,avx512vl")]
44089 unsafe fn test_mm_maskz_rsqrt14_ps() {
44090 let a = _mm_set1_ps(3.);
44091 let r = _mm_maskz_rsqrt14_ps(0, a);
44092 assert_eq_m128(r, _mm_setzero_ps());
44093 let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
44094 let e = _mm_set1_ps(0.5773392);
44095 assert_eq_m128(r, e);
44096 }
44097
44098 #[simd_test(enable = "avx512f")]
44099 unsafe fn test_mm512_getexp_ps() {
44100 let a = _mm512_set1_ps(3.);
44101 let r = _mm512_getexp_ps(a);
44102 let e = _mm512_set1_ps(1.);
44103 assert_eq_m512(r, e);
44104 }
44105
44106 #[simd_test(enable = "avx512f")]
44107 unsafe fn test_mm512_mask_getexp_ps() {
44108 let a = _mm512_set1_ps(3.);
44109 let r = _mm512_mask_getexp_ps(a, 0, a);
44110 assert_eq_m512(r, a);
44111 let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
44112 let e = _mm512_setr_ps(
44113 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
44114 );
44115 assert_eq_m512(r, e);
44116 }
44117
44118 #[simd_test(enable = "avx512f")]
44119 unsafe fn test_mm512_maskz_getexp_ps() {
44120 let a = _mm512_set1_ps(3.);
44121 let r = _mm512_maskz_getexp_ps(0, a);
44122 assert_eq_m512(r, _mm512_setzero_ps());
44123 let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
44124 let e = _mm512_setr_ps(
44125 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
44126 );
44127 assert_eq_m512(r, e);
44128 }
44129
44130 #[simd_test(enable = "avx512f,avx512vl")]
44131 unsafe fn test_mm256_getexp_ps() {
44132 let a = _mm256_set1_ps(3.);
44133 let r = _mm256_getexp_ps(a);
44134 let e = _mm256_set1_ps(1.);
44135 assert_eq_m256(r, e);
44136 }
44137
44138 #[simd_test(enable = "avx512f,avx512vl")]
44139 unsafe fn test_mm256_mask_getexp_ps() {
44140 let a = _mm256_set1_ps(3.);
44141 let r = _mm256_mask_getexp_ps(a, 0, a);
44142 assert_eq_m256(r, a);
44143 let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
44144 let e = _mm256_set1_ps(1.);
44145 assert_eq_m256(r, e);
44146 }
44147
44148 #[simd_test(enable = "avx512f,avx512vl")]
44149 unsafe fn test_mm256_maskz_getexp_ps() {
44150 let a = _mm256_set1_ps(3.);
44151 let r = _mm256_maskz_getexp_ps(0, a);
44152 assert_eq_m256(r, _mm256_setzero_ps());
44153 let r = _mm256_maskz_getexp_ps(0b11111111, a);
44154 let e = _mm256_set1_ps(1.);
44155 assert_eq_m256(r, e);
44156 }
44157
44158 #[simd_test(enable = "avx512f,avx512vl")]
44159 unsafe fn test_mm_getexp_ps() {
44160 let a = _mm_set1_ps(3.);
44161 let r = _mm_getexp_ps(a);
44162 let e = _mm_set1_ps(1.);
44163 assert_eq_m128(r, e);
44164 }
44165
44166 #[simd_test(enable = "avx512f,avx512vl")]
44167 unsafe fn test_mm_mask_getexp_ps() {
44168 let a = _mm_set1_ps(3.);
44169 let r = _mm_mask_getexp_ps(a, 0, a);
44170 assert_eq_m128(r, a);
44171 let r = _mm_mask_getexp_ps(a, 0b00001111, a);
44172 let e = _mm_set1_ps(1.);
44173 assert_eq_m128(r, e);
44174 }
44175
44176 #[simd_test(enable = "avx512f,avx512vl")]
44177 unsafe fn test_mm_maskz_getexp_ps() {
44178 let a = _mm_set1_ps(3.);
44179 let r = _mm_maskz_getexp_ps(0, a);
44180 assert_eq_m128(r, _mm_setzero_ps());
44181 let r = _mm_maskz_getexp_ps(0b00001111, a);
44182 let e = _mm_set1_ps(1.);
44183 assert_eq_m128(r, e);
44184 }
44185
44186 #[simd_test(enable = "avx512f")]
44187 unsafe fn test_mm512_roundscale_ps() {
44188 let a = _mm512_set1_ps(1.1);
44189 let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
44190 let e = _mm512_set1_ps(1.0);
44191 assert_eq_m512(r, e);
44192 }
44193
44194 #[simd_test(enable = "avx512f")]
44195 unsafe fn test_mm512_mask_roundscale_ps() {
44196 let a = _mm512_set1_ps(1.1);
44197 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
44198 let e = _mm512_set1_ps(1.1);
44199 assert_eq_m512(r, e);
44200 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
44201 let e = _mm512_set1_ps(1.0);
44202 assert_eq_m512(r, e);
44203 }
44204
44205 #[simd_test(enable = "avx512f")]
44206 unsafe fn test_mm512_maskz_roundscale_ps() {
44207 let a = _mm512_set1_ps(1.1);
44208 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
44209 assert_eq_m512(r, _mm512_setzero_ps());
44210 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
44211 let e = _mm512_set1_ps(1.0);
44212 assert_eq_m512(r, e);
44213 }
44214
44215 #[simd_test(enable = "avx512f,avx512vl")]
44216 unsafe fn test_mm256_roundscale_ps() {
44217 let a = _mm256_set1_ps(1.1);
44218 let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
44219 let e = _mm256_set1_ps(1.0);
44220 assert_eq_m256(r, e);
44221 }
44222
44223 #[simd_test(enable = "avx512f,avx512vl")]
44224 unsafe fn test_mm256_mask_roundscale_ps() {
44225 let a = _mm256_set1_ps(1.1);
44226 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
44227 let e = _mm256_set1_ps(1.1);
44228 assert_eq_m256(r, e);
44229 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
44230 let e = _mm256_set1_ps(1.0);
44231 assert_eq_m256(r, e);
44232 }
44233
44234 #[simd_test(enable = "avx512f,avx512vl")]
44235 unsafe fn test_mm256_maskz_roundscale_ps() {
44236 let a = _mm256_set1_ps(1.1);
44237 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
44238 assert_eq_m256(r, _mm256_setzero_ps());
44239 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
44240 let e = _mm256_set1_ps(1.0);
44241 assert_eq_m256(r, e);
44242 }
44243
44244 #[simd_test(enable = "avx512f,avx512vl")]
44245 unsafe fn test_mm_roundscale_ps() {
44246 let a = _mm_set1_ps(1.1);
44247 let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
44248 let e = _mm_set1_ps(1.0);
44249 assert_eq_m128(r, e);
44250 }
44251
44252 #[simd_test(enable = "avx512f,avx512vl")]
44253 unsafe fn test_mm_mask_roundscale_ps() {
44254 let a = _mm_set1_ps(1.1);
44255 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
44256 let e = _mm_set1_ps(1.1);
44257 assert_eq_m128(r, e);
44258 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
44259 let e = _mm_set1_ps(1.0);
44260 assert_eq_m128(r, e);
44261 }
44262
44263 #[simd_test(enable = "avx512f,avx512vl")]
44264 unsafe fn test_mm_maskz_roundscale_ps() {
44265 let a = _mm_set1_ps(1.1);
44266 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
44267 assert_eq_m128(r, _mm_setzero_ps());
44268 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
44269 let e = _mm_set1_ps(1.0);
44270 assert_eq_m128(r, e);
44271 }
44272
44273 #[simd_test(enable = "avx512f")]
44274 unsafe fn test_mm512_scalef_ps() {
44275 let a = _mm512_set1_ps(1.);
44276 let b = _mm512_set1_ps(3.);
44277 let r = _mm512_scalef_ps(a, b);
44278 let e = _mm512_set1_ps(8.);
44279 assert_eq_m512(r, e);
44280 }
44281
44282 #[simd_test(enable = "avx512f")]
44283 unsafe fn test_mm512_mask_scalef_ps() {
44284 let a = _mm512_set1_ps(1.);
44285 let b = _mm512_set1_ps(3.);
44286 let r = _mm512_mask_scalef_ps(a, 0, a, b);
44287 assert_eq_m512(r, a);
44288 let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
44289 let e = _mm512_set_ps(
44290 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44291 );
44292 assert_eq_m512(r, e);
44293 }
44294
44295 #[simd_test(enable = "avx512f")]
44296 unsafe fn test_mm512_maskz_scalef_ps() {
44297 let a = _mm512_set1_ps(1.);
44298 let b = _mm512_set1_ps(3.);
44299 let r = _mm512_maskz_scalef_ps(0, a, b);
44300 assert_eq_m512(r, _mm512_setzero_ps());
44301 let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
44302 let e = _mm512_set_ps(
44303 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44304 );
44305 assert_eq_m512(r, e);
44306 }
44307
44308 #[simd_test(enable = "avx512f,avx512vl")]
44309 unsafe fn test_mm256_scalef_ps() {
44310 let a = _mm256_set1_ps(1.);
44311 let b = _mm256_set1_ps(3.);
44312 let r = _mm256_scalef_ps(a, b);
44313 let e = _mm256_set1_ps(8.);
44314 assert_eq_m256(r, e);
44315 }
44316
44317 #[simd_test(enable = "avx512f,avx512vl")]
44318 unsafe fn test_mm256_mask_scalef_ps() {
44319 let a = _mm256_set1_ps(1.);
44320 let b = _mm256_set1_ps(3.);
44321 let r = _mm256_mask_scalef_ps(a, 0, a, b);
44322 assert_eq_m256(r, a);
44323 let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
44324 let e = _mm256_set1_ps(8.);
44325 assert_eq_m256(r, e);
44326 }
44327
44328 #[simd_test(enable = "avx512f,avx512vl")]
44329 unsafe fn test_mm256_maskz_scalef_ps() {
44330 let a = _mm256_set1_ps(1.);
44331 let b = _mm256_set1_ps(3.);
44332 let r = _mm256_maskz_scalef_ps(0, a, b);
44333 assert_eq_m256(r, _mm256_setzero_ps());
44334 let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
44335 let e = _mm256_set1_ps(8.);
44336 assert_eq_m256(r, e);
44337 }
44338
44339 #[simd_test(enable = "avx512f,avx512vl")]
44340 unsafe fn test_mm_scalef_ps() {
44341 let a = _mm_set1_ps(1.);
44342 let b = _mm_set1_ps(3.);
44343 let r = _mm_scalef_ps(a, b);
44344 let e = _mm_set1_ps(8.);
44345 assert_eq_m128(r, e);
44346 }
44347
44348 #[simd_test(enable = "avx512f,avx512vl")]
44349 unsafe fn test_mm_mask_scalef_ps() {
44350 let a = _mm_set1_ps(1.);
44351 let b = _mm_set1_ps(3.);
44352 let r = _mm_mask_scalef_ps(a, 0, a, b);
44353 assert_eq_m128(r, a);
44354 let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
44355 let e = _mm_set1_ps(8.);
44356 assert_eq_m128(r, e);
44357 }
44358
44359 #[simd_test(enable = "avx512f,avx512vl")]
44360 unsafe fn test_mm_maskz_scalef_ps() {
44361 let a = _mm_set1_ps(1.);
44362 let b = _mm_set1_ps(3.);
44363 let r = _mm_maskz_scalef_ps(0, a, b);
44364 assert_eq_m128(r, _mm_setzero_ps());
44365 let r = _mm_maskz_scalef_ps(0b00001111, a, b);
44366 let e = _mm_set1_ps(8.);
44367 assert_eq_m128(r, e);
44368 }
44369
44370 #[simd_test(enable = "avx512f")]
44371 unsafe fn test_mm512_fixupimm_ps() {
44372 let a = _mm512_set1_ps(f32::NAN);
44373 let b = _mm512_set1_ps(f32::MAX);
44374 let c = _mm512_set1_epi32(i32::MAX);
44375 //let r = _mm512_fixupimm_ps(a, b, c, 5);
44376 let r = _mm512_fixupimm_ps::<5>(a, b, c);
44377 let e = _mm512_set1_ps(0.0);
44378 assert_eq_m512(r, e);
44379 }
44380
44381 #[simd_test(enable = "avx512f")]
44382 unsafe fn test_mm512_mask_fixupimm_ps() {
44383 #[rustfmt::skip]
44384 let a = _mm512_set_ps(
44385 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44386 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44387 1., 1., 1., 1.,
44388 1., 1., 1., 1.,
44389 );
44390 let b = _mm512_set1_ps(f32::MAX);
44391 let c = _mm512_set1_epi32(i32::MAX);
44392 let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
44393 let e = _mm512_set_ps(
44394 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
44395 );
44396 assert_eq_m512(r, e);
44397 }
44398
44399 #[simd_test(enable = "avx512f")]
44400 unsafe fn test_mm512_maskz_fixupimm_ps() {
44401 #[rustfmt::skip]
44402 let a = _mm512_set_ps(
44403 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44404 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44405 1., 1., 1., 1.,
44406 1., 1., 1., 1.,
44407 );
44408 let b = _mm512_set1_ps(f32::MAX);
44409 let c = _mm512_set1_epi32(i32::MAX);
44410 let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
44411 let e = _mm512_set_ps(
44412 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
44413 );
44414 assert_eq_m512(r, e);
44415 }
44416
44417 #[simd_test(enable = "avx512f,avx512vl")]
44418 unsafe fn test_mm256_fixupimm_ps() {
44419 let a = _mm256_set1_ps(f32::NAN);
44420 let b = _mm256_set1_ps(f32::MAX);
44421 let c = _mm256_set1_epi32(i32::MAX);
44422 let r = _mm256_fixupimm_ps::<5>(a, b, c);
44423 let e = _mm256_set1_ps(0.0);
44424 assert_eq_m256(r, e);
44425 }
44426
44427 #[simd_test(enable = "avx512f,avx512vl")]
44428 unsafe fn test_mm256_mask_fixupimm_ps() {
44429 let a = _mm256_set1_ps(f32::NAN);
44430 let b = _mm256_set1_ps(f32::MAX);
44431 let c = _mm256_set1_epi32(i32::MAX);
44432 let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
44433 let e = _mm256_set1_ps(0.0);
44434 assert_eq_m256(r, e);
44435 }
44436
44437 #[simd_test(enable = "avx512f,avx512vl")]
44438 unsafe fn test_mm256_maskz_fixupimm_ps() {
44439 let a = _mm256_set1_ps(f32::NAN);
44440 let b = _mm256_set1_ps(f32::MAX);
44441 let c = _mm256_set1_epi32(i32::MAX);
44442 let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
44443 let e = _mm256_set1_ps(0.0);
44444 assert_eq_m256(r, e);
44445 }
44446
44447 #[simd_test(enable = "avx512f,avx512vl")]
44448 unsafe fn test_mm_fixupimm_ps() {
44449 let a = _mm_set1_ps(f32::NAN);
44450 let b = _mm_set1_ps(f32::MAX);
44451 let c = _mm_set1_epi32(i32::MAX);
44452 let r = _mm_fixupimm_ps::<5>(a, b, c);
44453 let e = _mm_set1_ps(0.0);
44454 assert_eq_m128(r, e);
44455 }
44456
44457 #[simd_test(enable = "avx512f,avx512vl")]
44458 unsafe fn test_mm_mask_fixupimm_ps() {
44459 let a = _mm_set1_ps(f32::NAN);
44460 let b = _mm_set1_ps(f32::MAX);
44461 let c = _mm_set1_epi32(i32::MAX);
44462 let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
44463 let e = _mm_set1_ps(0.0);
44464 assert_eq_m128(r, e);
44465 }
44466
44467 #[simd_test(enable = "avx512f,avx512vl")]
44468 unsafe fn test_mm_maskz_fixupimm_ps() {
44469 let a = _mm_set1_ps(f32::NAN);
44470 let b = _mm_set1_ps(f32::MAX);
44471 let c = _mm_set1_epi32(i32::MAX);
44472 let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
44473 let e = _mm_set1_ps(0.0);
44474 assert_eq_m128(r, e);
44475 }
44476
44477 #[simd_test(enable = "avx512f")]
44478 unsafe fn test_mm512_ternarylogic_epi32() {
44479 let a = _mm512_set1_epi32(1 << 2);
44480 let b = _mm512_set1_epi32(1 << 1);
44481 let c = _mm512_set1_epi32(1 << 0);
44482 let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
44483 let e = _mm512_set1_epi32(0);
44484 assert_eq_m512i(r, e);
44485 }
44486
44487 #[simd_test(enable = "avx512f")]
44488 unsafe fn test_mm512_mask_ternarylogic_epi32() {
44489 let src = _mm512_set1_epi32(1 << 2);
44490 let a = _mm512_set1_epi32(1 << 1);
44491 let b = _mm512_set1_epi32(1 << 0);
44492 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
44493 assert_eq_m512i(r, src);
44494 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
44495 let e = _mm512_set1_epi32(0);
44496 assert_eq_m512i(r, e);
44497 }
44498
44499 #[simd_test(enable = "avx512f")]
44500 unsafe fn test_mm512_maskz_ternarylogic_epi32() {
44501 let a = _mm512_set1_epi32(1 << 2);
44502 let b = _mm512_set1_epi32(1 << 1);
44503 let c = _mm512_set1_epi32(1 << 0);
44504 let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
44505 assert_eq_m512i(r, _mm512_setzero_si512());
44506 let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
44507 let e = _mm512_set1_epi32(0);
44508 assert_eq_m512i(r, e);
44509 }
44510
44511 #[simd_test(enable = "avx512f,avx512vl")]
44512 unsafe fn test_mm256_ternarylogic_epi32() {
44513 let a = _mm256_set1_epi32(1 << 2);
44514 let b = _mm256_set1_epi32(1 << 1);
44515 let c = _mm256_set1_epi32(1 << 0);
44516 let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
44517 let e = _mm256_set1_epi32(0);
44518 assert_eq_m256i(r, e);
44519 }
44520
44521 #[simd_test(enable = "avx512f,avx512vl")]
44522 unsafe fn test_mm256_mask_ternarylogic_epi32() {
44523 let src = _mm256_set1_epi32(1 << 2);
44524 let a = _mm256_set1_epi32(1 << 1);
44525 let b = _mm256_set1_epi32(1 << 0);
44526 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
44527 assert_eq_m256i(r, src);
44528 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
44529 let e = _mm256_set1_epi32(0);
44530 assert_eq_m256i(r, e);
44531 }
44532
44533 #[simd_test(enable = "avx512f,avx512vl")]
44534 unsafe fn test_mm256_maskz_ternarylogic_epi32() {
44535 let a = _mm256_set1_epi32(1 << 2);
44536 let b = _mm256_set1_epi32(1 << 1);
44537 let c = _mm256_set1_epi32(1 << 0);
44538 let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
44539 assert_eq_m256i(r, _mm256_setzero_si256());
44540 let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
44541 let e = _mm256_set1_epi32(0);
44542 assert_eq_m256i(r, e);
44543 }
44544
44545 #[simd_test(enable = "avx512f,avx512vl")]
44546 unsafe fn test_mm_ternarylogic_epi32() {
44547 let a = _mm_set1_epi32(1 << 2);
44548 let b = _mm_set1_epi32(1 << 1);
44549 let c = _mm_set1_epi32(1 << 0);
44550 let r = _mm_ternarylogic_epi32::<8>(a, b, c);
44551 let e = _mm_set1_epi32(0);
44552 assert_eq_m128i(r, e);
44553 }
44554
44555 #[simd_test(enable = "avx512f,avx512vl")]
44556 unsafe fn test_mm_mask_ternarylogic_epi32() {
44557 let src = _mm_set1_epi32(1 << 2);
44558 let a = _mm_set1_epi32(1 << 1);
44559 let b = _mm_set1_epi32(1 << 0);
44560 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
44561 assert_eq_m128i(r, src);
44562 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
44563 let e = _mm_set1_epi32(0);
44564 assert_eq_m128i(r, e);
44565 }
44566
44567 #[simd_test(enable = "avx512f,avx512vl")]
44568 unsafe fn test_mm_maskz_ternarylogic_epi32() {
44569 let a = _mm_set1_epi32(1 << 2);
44570 let b = _mm_set1_epi32(1 << 1);
44571 let c = _mm_set1_epi32(1 << 0);
44572 let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
44573 assert_eq_m128i(r, _mm_setzero_si128());
44574 let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
44575 let e = _mm_set1_epi32(0);
44576 assert_eq_m128i(r, e);
44577 }
44578
44579 #[simd_test(enable = "avx512f")]
44580 unsafe fn test_mm512_getmant_ps() {
44581 let a = _mm512_set1_ps(10.);
44582 let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44583 let e = _mm512_set1_ps(1.25);
44584 assert_eq_m512(r, e);
44585 }
44586
44587 #[simd_test(enable = "avx512f")]
44588 unsafe fn test_mm512_mask_getmant_ps() {
44589 let a = _mm512_set1_ps(10.);
44590 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
44591 assert_eq_m512(r, a);
44592 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
44593 a,
44594 0b11111111_00000000,
44595 a,
44596 );
44597 let e = _mm512_setr_ps(
44598 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
44599 );
44600 assert_eq_m512(r, e);
44601 }
44602
44603 #[simd_test(enable = "avx512f")]
44604 unsafe fn test_mm512_maskz_getmant_ps() {
44605 let a = _mm512_set1_ps(10.);
44606 let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
44607 assert_eq_m512(r, _mm512_setzero_ps());
44608 let r =
44609 _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
44610 let e = _mm512_setr_ps(
44611 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
44612 );
44613 assert_eq_m512(r, e);
44614 }
44615
44616 #[simd_test(enable = "avx512f,avx512vl")]
44617 unsafe fn test_mm256_getmant_ps() {
44618 let a = _mm256_set1_ps(10.);
44619 let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44620 let e = _mm256_set1_ps(1.25);
44621 assert_eq_m256(r, e);
44622 }
44623
44624 #[simd_test(enable = "avx512f,avx512vl")]
44625 unsafe fn test_mm256_mask_getmant_ps() {
44626 let a = _mm256_set1_ps(10.);
44627 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
44628 assert_eq_m256(r, a);
44629 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
44630 let e = _mm256_set1_ps(1.25);
44631 assert_eq_m256(r, e);
44632 }
44633
44634 #[simd_test(enable = "avx512f,avx512vl")]
44635 unsafe fn test_mm256_maskz_getmant_ps() {
44636 let a = _mm256_set1_ps(10.);
44637 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
44638 assert_eq_m256(r, _mm256_setzero_ps());
44639 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
44640 let e = _mm256_set1_ps(1.25);
44641 assert_eq_m256(r, e);
44642 }
44643
44644 #[simd_test(enable = "avx512f,avx512vl")]
44645 unsafe fn test_mm_getmant_ps() {
44646 let a = _mm_set1_ps(10.);
44647 let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44648 let e = _mm_set1_ps(1.25);
44649 assert_eq_m128(r, e);
44650 }
44651
44652 #[simd_test(enable = "avx512f,avx512vl")]
44653 unsafe fn test_mm_mask_getmant_ps() {
44654 let a = _mm_set1_ps(10.);
44655 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
44656 assert_eq_m128(r, a);
44657 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
44658 let e = _mm_set1_ps(1.25);
44659 assert_eq_m128(r, e);
44660 }
44661
44662 #[simd_test(enable = "avx512f,avx512vl")]
44663 unsafe fn test_mm_maskz_getmant_ps() {
44664 let a = _mm_set1_ps(10.);
44665 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
44666 assert_eq_m128(r, _mm_setzero_ps());
44667 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
44668 let e = _mm_set1_ps(1.25);
44669 assert_eq_m128(r, e);
44670 }
44671
44672 #[simd_test(enable = "avx512f")]
44673 unsafe fn test_mm512_add_round_ps() {
44674 let a = _mm512_setr_ps(
44675 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44676 );
44677 let b = _mm512_set1_ps(-1.);
44678 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44679 #[rustfmt::skip]
44680 let e = _mm512_setr_ps(
44681 -1., 0.5, 1., 2.5,
44682 3., 4.5, 5., 6.5,
44683 7., 8.5, 9., 10.5,
44684 11., 12.5, 13., -0.99999994,
44685 );
44686 assert_eq_m512(r, e);
44687 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44688 let e = _mm512_setr_ps(
44689 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
44690 );
44691 assert_eq_m512(r, e);
44692 }
44693
44694 #[simd_test(enable = "avx512f")]
44695 unsafe fn test_mm512_mask_add_round_ps() {
44696 let a = _mm512_setr_ps(
44697 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44698 );
44699 let b = _mm512_set1_ps(-1.);
44700 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
44701 assert_eq_m512(r, a);
44702 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44703 a,
44704 0b11111111_00000000,
44705 a,
44706 b,
44707 );
44708 #[rustfmt::skip]
44709 let e = _mm512_setr_ps(
44710 0., 1.5, 2., 3.5,
44711 4., 5.5, 6., 7.5,
44712 7., 8.5, 9., 10.5,
44713 11., 12.5, 13., -0.99999994,
44714 );
44715 assert_eq_m512(r, e);
44716 }
44717
44718 #[simd_test(enable = "avx512f")]
44719 unsafe fn test_mm512_maskz_add_round_ps() {
44720 let a = _mm512_setr_ps(
44721 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44722 );
44723 let b = _mm512_set1_ps(-1.);
44724 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
44725 assert_eq_m512(r, _mm512_setzero_ps());
44726 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44727 0b11111111_00000000,
44728 a,
44729 b,
44730 );
44731 #[rustfmt::skip]
44732 let e = _mm512_setr_ps(
44733 0., 0., 0., 0.,
44734 0., 0., 0., 0.,
44735 7., 8.5, 9., 10.5,
44736 11., 12.5, 13., -0.99999994,
44737 );
44738 assert_eq_m512(r, e);
44739 }
44740
44741 #[simd_test(enable = "avx512f")]
44742 unsafe fn test_mm512_sub_round_ps() {
44743 let a = _mm512_setr_ps(
44744 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44745 );
44746 let b = _mm512_set1_ps(1.);
44747 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44748 #[rustfmt::skip]
44749 let e = _mm512_setr_ps(
44750 -1., 0.5, 1., 2.5,
44751 3., 4.5, 5., 6.5,
44752 7., 8.5, 9., 10.5,
44753 11., 12.5, 13., -0.99999994,
44754 );
44755 assert_eq_m512(r, e);
44756 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44757 let e = _mm512_setr_ps(
44758 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
44759 );
44760 assert_eq_m512(r, e);
44761 }
44762
44763 #[simd_test(enable = "avx512f")]
44764 unsafe fn test_mm512_mask_sub_round_ps() {
44765 let a = _mm512_setr_ps(
44766 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44767 );
44768 let b = _mm512_set1_ps(1.);
44769 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44770 a, 0, a, b,
44771 );
44772 assert_eq_m512(r, a);
44773 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44774 a,
44775 0b11111111_00000000,
44776 a,
44777 b,
44778 );
44779 #[rustfmt::skip]
44780 let e = _mm512_setr_ps(
44781 0., 1.5, 2., 3.5,
44782 4., 5.5, 6., 7.5,
44783 7., 8.5, 9., 10.5,
44784 11., 12.5, 13., -0.99999994,
44785 );
44786 assert_eq_m512(r, e);
44787 }
44788
44789 #[simd_test(enable = "avx512f")]
44790 unsafe fn test_mm512_maskz_sub_round_ps() {
44791 let a = _mm512_setr_ps(
44792 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44793 );
44794 let b = _mm512_set1_ps(1.);
44795 let r =
44796 _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
44797 assert_eq_m512(r, _mm512_setzero_ps());
44798 let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44799 0b11111111_00000000,
44800 a,
44801 b,
44802 );
44803 #[rustfmt::skip]
44804 let e = _mm512_setr_ps(
44805 0., 0., 0., 0.,
44806 0., 0., 0., 0.,
44807 7., 8.5, 9., 10.5,
44808 11., 12.5, 13., -0.99999994,
44809 );
44810 assert_eq_m512(r, e);
44811 }
44812
44813 #[simd_test(enable = "avx512f")]
44814 unsafe fn test_mm512_mul_round_ps() {
44815 #[rustfmt::skip]
44816 let a = _mm512_setr_ps(
44817 0., 1.5, 2., 3.5,
44818 4., 5.5, 6., 7.5,
44819 8., 9.5, 10., 11.5,
44820 12., 13.5, 14., 0.00000000000000000000007,
44821 );
44822 let b = _mm512_set1_ps(0.1);
44823 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44824 #[rustfmt::skip]
44825 let e = _mm512_setr_ps(
44826 0., 0.15, 0.2, 0.35,
44827 0.4, 0.55, 0.6, 0.75,
44828 0.8, 0.95, 1.0, 1.15,
44829 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
44830 );
44831 assert_eq_m512(r, e);
44832 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44833 #[rustfmt::skip]
44834 let e = _mm512_setr_ps(
44835 0., 0.14999999, 0.2, 0.35,
44836 0.4, 0.54999995, 0.59999996, 0.75,
44837 0.8, 0.95, 1.0, 1.15,
44838 1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
44839 );
44840 assert_eq_m512(r, e);
44841 }
44842
44843 #[simd_test(enable = "avx512f")]
44844 unsafe fn test_mm512_mask_mul_round_ps() {
44845 #[rustfmt::skip]
44846 let a = _mm512_setr_ps(
44847 0., 1.5, 2., 3.5,
44848 4., 5.5, 6., 7.5,
44849 8., 9.5, 10., 11.5,
44850 12., 13.5, 14., 0.00000000000000000000007,
44851 );
44852 let b = _mm512_set1_ps(0.1);
44853 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44854 a, 0, a, b,
44855 );
44856 assert_eq_m512(r, a);
44857 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44858 a,
44859 0b11111111_00000000,
44860 a,
44861 b,
44862 );
44863 #[rustfmt::skip]
44864 let e = _mm512_setr_ps(
44865 0., 1.5, 2., 3.5,
44866 4., 5.5, 6., 7.5,
44867 0.8, 0.95, 1.0, 1.15,
44868 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
44869 );
44870 assert_eq_m512(r, e);
44871 }
44872
44873 #[simd_test(enable = "avx512f")]
44874 unsafe fn test_mm512_maskz_mul_round_ps() {
44875 #[rustfmt::skip]
44876 let a = _mm512_setr_ps(
44877 0., 1.5, 2., 3.5,
44878 4., 5.5, 6., 7.5,
44879 8., 9.5, 10., 11.5,
44880 12., 13.5, 14., 0.00000000000000000000007,
44881 );
44882 let b = _mm512_set1_ps(0.1);
44883 let r =
44884 _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
44885 assert_eq_m512(r, _mm512_setzero_ps());
44886 let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44887 0b11111111_00000000,
44888 a,
44889 b,
44890 );
44891 #[rustfmt::skip]
44892 let e = _mm512_setr_ps(
44893 0., 0., 0., 0.,
44894 0., 0., 0., 0.,
44895 0.8, 0.95, 1.0, 1.15,
44896 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
44897 );
44898 assert_eq_m512(r, e);
44899 }
44900
44901 #[simd_test(enable = "avx512f")]
44902 unsafe fn test_mm512_div_round_ps() {
44903 let a = _mm512_set1_ps(1.);
44904 let b = _mm512_set1_ps(3.);
44905 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44906 let e = _mm512_set1_ps(0.33333334);
44907 assert_eq_m512(r, e);
44908 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44909 let e = _mm512_set1_ps(0.3333333);
44910 assert_eq_m512(r, e);
44911 }
44912
44913 #[simd_test(enable = "avx512f")]
44914 unsafe fn test_mm512_mask_div_round_ps() {
44915 let a = _mm512_set1_ps(1.);
44916 let b = _mm512_set1_ps(3.);
44917 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44918 a, 0, a, b,
44919 );
44920 assert_eq_m512(r, a);
44921 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44922 a,
44923 0b11111111_00000000,
44924 a,
44925 b,
44926 );
44927 let e = _mm512_setr_ps(
44928 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
44929 0.33333334, 0.33333334, 0.33333334, 0.33333334,
44930 );
44931 assert_eq_m512(r, e);
44932 }
44933
44934 #[simd_test(enable = "avx512f")]
44935 unsafe fn test_mm512_maskz_div_round_ps() {
44936 let a = _mm512_set1_ps(1.);
44937 let b = _mm512_set1_ps(3.);
44938 let r =
44939 _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
44940 assert_eq_m512(r, _mm512_setzero_ps());
44941 let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44942 0b11111111_00000000,
44943 a,
44944 b,
44945 );
44946 let e = _mm512_setr_ps(
44947 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
44948 0.33333334, 0.33333334, 0.33333334, 0.33333334,
44949 );
44950 assert_eq_m512(r, e);
44951 }
44952
44953 #[simd_test(enable = "avx512f")]
44954 unsafe fn test_mm512_sqrt_round_ps() {
44955 let a = _mm512_set1_ps(3.);
44956 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
44957 let e = _mm512_set1_ps(1.7320508);
44958 assert_eq_m512(r, e);
44959 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
44960 let e = _mm512_set1_ps(1.7320509);
44961 assert_eq_m512(r, e);
44962 }
44963
44964 #[simd_test(enable = "avx512f")]
44965 unsafe fn test_mm512_mask_sqrt_round_ps() {
44966 let a = _mm512_set1_ps(3.);
44967 let r =
44968 _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
44969 assert_eq_m512(r, a);
44970 let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44971 a,
44972 0b11111111_00000000,
44973 a,
44974 );
44975 let e = _mm512_setr_ps(
44976 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
44977 1.7320508, 1.7320508, 1.7320508,
44978 );
44979 assert_eq_m512(r, e);
44980 }
44981
44982 #[simd_test(enable = "avx512f")]
44983 unsafe fn test_mm512_maskz_sqrt_round_ps() {
44984 let a = _mm512_set1_ps(3.);
44985 let r =
44986 _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
44987 assert_eq_m512(r, _mm512_setzero_ps());
44988 let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44989 0b11111111_00000000,
44990 a,
44991 );
44992 let e = _mm512_setr_ps(
44993 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
44994 1.7320508, 1.7320508, 1.7320508,
44995 );
44996 assert_eq_m512(r, e);
44997 }
44998
44999 #[simd_test(enable = "avx512f")]
45000 unsafe fn test_mm512_fmadd_round_ps() {
45001 let a = _mm512_set1_ps(0.00000007);
45002 let b = _mm512_set1_ps(1.);
45003 let c = _mm512_set1_ps(-1.);
45004 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45005 let e = _mm512_set1_ps(-0.99999994);
45006 assert_eq_m512(r, e);
45007 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45008 let e = _mm512_set1_ps(-0.9999999);
45009 assert_eq_m512(r, e);
45010 }
45011
45012 #[simd_test(enable = "avx512f")]
45013 unsafe fn test_mm512_mask_fmadd_round_ps() {
45014 let a = _mm512_set1_ps(0.00000007);
45015 let b = _mm512_set1_ps(1.);
45016 let c = _mm512_set1_ps(-1.);
45017 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45018 a, 0, b, c,
45019 );
45020 assert_eq_m512(r, a);
45021 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45022 a,
45023 0b00000000_11111111,
45024 b,
45025 c,
45026 );
45027 #[rustfmt::skip]
45028 let e = _mm512_setr_ps(
45029 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45030 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45031 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45032 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45033 );
45034 assert_eq_m512(r, e);
45035 }
45036
45037 #[simd_test(enable = "avx512f")]
45038 unsafe fn test_mm512_maskz_fmadd_round_ps() {
45039 let a = _mm512_set1_ps(0.00000007);
45040 let b = _mm512_set1_ps(1.);
45041 let c = _mm512_set1_ps(-1.);
45042 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45043 0, a, b, c,
45044 );
45045 assert_eq_m512(r, _mm512_setzero_ps());
45046 #[rustfmt::skip]
45047 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45048 0b00000000_11111111,
45049 a,
45050 b,
45051 c,
45052 );
45053 #[rustfmt::skip]
45054 let e = _mm512_setr_ps(
45055 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45056 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45057 0., 0., 0., 0.,
45058 0., 0., 0., 0.,
45059 );
45060 assert_eq_m512(r, e);
45061 }
45062
45063 #[simd_test(enable = "avx512f")]
45064 unsafe fn test_mm512_mask3_fmadd_round_ps() {
45065 let a = _mm512_set1_ps(0.00000007);
45066 let b = _mm512_set1_ps(1.);
45067 let c = _mm512_set1_ps(-1.);
45068 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45069 a, b, c, 0,
45070 );
45071 assert_eq_m512(r, c);
45072 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45073 a,
45074 b,
45075 c,
45076 0b00000000_11111111,
45077 );
45078 #[rustfmt::skip]
45079 let e = _mm512_setr_ps(
45080 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45081 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45082 -1., -1., -1., -1.,
45083 -1., -1., -1., -1.,
45084 );
45085 assert_eq_m512(r, e);
45086 }
45087
45088 #[simd_test(enable = "avx512f")]
45089 unsafe fn test_mm512_fmsub_round_ps() {
45090 let a = _mm512_set1_ps(0.00000007);
45091 let b = _mm512_set1_ps(1.);
45092 let c = _mm512_set1_ps(1.);
45093 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45094 let e = _mm512_set1_ps(-0.99999994);
45095 assert_eq_m512(r, e);
45096 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45097 let e = _mm512_set1_ps(-0.9999999);
45098 assert_eq_m512(r, e);
45099 }
45100
45101 #[simd_test(enable = "avx512f")]
45102 unsafe fn test_mm512_mask_fmsub_round_ps() {
45103 let a = _mm512_set1_ps(0.00000007);
45104 let b = _mm512_set1_ps(1.);
45105 let c = _mm512_set1_ps(1.);
45106 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45107 a, 0, b, c,
45108 );
45109 assert_eq_m512(r, a);
45110 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45111 a,
45112 0b00000000_11111111,
45113 b,
45114 c,
45115 );
45116 #[rustfmt::skip]
45117 let e = _mm512_setr_ps(
45118 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45119 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45120 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45121 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45122 );
45123 assert_eq_m512(r, e);
45124 }
45125
45126 #[simd_test(enable = "avx512f")]
45127 unsafe fn test_mm512_maskz_fmsub_round_ps() {
45128 let a = _mm512_set1_ps(0.00000007);
45129 let b = _mm512_set1_ps(1.);
45130 let c = _mm512_set1_ps(1.);
45131 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45132 0, a, b, c,
45133 );
45134 assert_eq_m512(r, _mm512_setzero_ps());
45135 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45136 0b00000000_11111111,
45137 a,
45138 b,
45139 c,
45140 );
45141 #[rustfmt::skip]
45142 let e = _mm512_setr_ps(
45143 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45144 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45145 0., 0., 0., 0.,
45146 0., 0., 0., 0.,
45147 );
45148 assert_eq_m512(r, e);
45149 }
45150
45151 #[simd_test(enable = "avx512f")]
45152 unsafe fn test_mm512_mask3_fmsub_round_ps() {
45153 let a = _mm512_set1_ps(0.00000007);
45154 let b = _mm512_set1_ps(1.);
45155 let c = _mm512_set1_ps(1.);
45156 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45157 a, b, c, 0,
45158 );
45159 assert_eq_m512(r, c);
45160 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45161 a,
45162 b,
45163 c,
45164 0b00000000_11111111,
45165 );
45166 #[rustfmt::skip]
45167 let e = _mm512_setr_ps(
45168 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45169 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
45170 1., 1., 1., 1.,
45171 1., 1., 1., 1.,
45172 );
45173 assert_eq_m512(r, e);
45174 }
45175
45176 #[simd_test(enable = "avx512f")]
45177 unsafe fn test_mm512_fmaddsub_round_ps() {
45178 let a = _mm512_set1_ps(0.00000007);
45179 let b = _mm512_set1_ps(1.);
45180 let c = _mm512_set1_ps(-1.);
45181 let r =
45182 _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45183 #[rustfmt::skip]
45184 let e = _mm512_setr_ps(
45185 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45186 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45187 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45188 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45189 );
45190 assert_eq_m512(r, e);
45191 let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45192 let e = _mm512_setr_ps(
45193 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
45194 -0.9999999, 1., -0.9999999, 1., -0.9999999,
45195 );
45196 assert_eq_m512(r, e);
45197 }
45198
45199 #[simd_test(enable = "avx512f")]
45200 unsafe fn test_mm512_mask_fmaddsub_round_ps() {
45201 let a = _mm512_set1_ps(0.00000007);
45202 let b = _mm512_set1_ps(1.);
45203 let c = _mm512_set1_ps(-1.);
45204 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45205 a, 0, b, c,
45206 );
45207 assert_eq_m512(r, a);
45208 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45209 a,
45210 0b00000000_11111111,
45211 b,
45212 c,
45213 );
45214 #[rustfmt::skip]
45215 let e = _mm512_setr_ps(
45216 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45217 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45218 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45219 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45220 );
45221 assert_eq_m512(r, e);
45222 }
45223
45224 #[simd_test(enable = "avx512f")]
45225 unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
45226 let a = _mm512_set1_ps(0.00000007);
45227 let b = _mm512_set1_ps(1.);
45228 let c = _mm512_set1_ps(-1.);
45229 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45230 0, a, b, c,
45231 );
45232 assert_eq_m512(r, _mm512_setzero_ps());
45233 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45234 0b00000000_11111111,
45235 a,
45236 b,
45237 c,
45238 );
45239 #[rustfmt::skip]
45240 let e = _mm512_setr_ps(
45241 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45242 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45243 0., 0., 0., 0.,
45244 0., 0., 0., 0.,
45245 );
45246 assert_eq_m512(r, e);
45247 }
45248
45249 #[simd_test(enable = "avx512f")]
45250 unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
45251 let a = _mm512_set1_ps(0.00000007);
45252 let b = _mm512_set1_ps(1.);
45253 let c = _mm512_set1_ps(-1.);
45254 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45255 a, b, c, 0,
45256 );
45257 assert_eq_m512(r, c);
45258 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45259 a,
45260 b,
45261 c,
45262 0b00000000_11111111,
45263 );
45264 #[rustfmt::skip]
45265 let e = _mm512_setr_ps(
45266 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45267 1.0000001, -0.99999994, 1.0000001, -0.99999994,
45268 -1., -1., -1., -1.,
45269 -1., -1., -1., -1.,
45270 );
45271 assert_eq_m512(r, e);
45272 }
45273
45274 #[simd_test(enable = "avx512f")]
45275 unsafe fn test_mm512_fmsubadd_round_ps() {
45276 let a = _mm512_set1_ps(0.00000007);
45277 let b = _mm512_set1_ps(1.);
45278 let c = _mm512_set1_ps(-1.);
45279 let r =
45280 _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45281 #[rustfmt::skip]
45282 let e = _mm512_setr_ps(
45283 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45284 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45285 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45286 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45287 );
45288 assert_eq_m512(r, e);
45289 let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45290 let e = _mm512_setr_ps(
45291 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
45292 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
45293 );
45294 assert_eq_m512(r, e);
45295 }
45296
45297 #[simd_test(enable = "avx512f")]
45298 unsafe fn test_mm512_mask_fmsubadd_round_ps() {
45299 let a = _mm512_set1_ps(0.00000007);
45300 let b = _mm512_set1_ps(1.);
45301 let c = _mm512_set1_ps(-1.);
45302 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45303 a, 0, b, c,
45304 );
45305 assert_eq_m512(r, a);
45306 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45307 a,
45308 0b00000000_11111111,
45309 b,
45310 c,
45311 );
45312 #[rustfmt::skip]
45313 let e = _mm512_setr_ps(
45314 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45315 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45316 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45317 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45318 );
45319 assert_eq_m512(r, e);
45320 }
45321
45322 #[simd_test(enable = "avx512f")]
45323 unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
45324 let a = _mm512_set1_ps(0.00000007);
45325 let b = _mm512_set1_ps(1.);
45326 let c = _mm512_set1_ps(-1.);
45327 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45328 0, a, b, c,
45329 );
45330 assert_eq_m512(r, _mm512_setzero_ps());
45331 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45332 0b00000000_11111111,
45333 a,
45334 b,
45335 c,
45336 );
45337 #[rustfmt::skip]
45338 let e = _mm512_setr_ps(
45339 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45340 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45341 0., 0., 0., 0.,
45342 0., 0., 0., 0.,
45343 );
45344 assert_eq_m512(r, e);
45345 }
45346
45347 #[simd_test(enable = "avx512f")]
45348 unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
45349 let a = _mm512_set1_ps(0.00000007);
45350 let b = _mm512_set1_ps(1.);
45351 let c = _mm512_set1_ps(-1.);
45352 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45353 a, b, c, 0,
45354 );
45355 assert_eq_m512(r, c);
45356 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45357 a,
45358 b,
45359 c,
45360 0b00000000_11111111,
45361 );
45362 #[rustfmt::skip]
45363 let e = _mm512_setr_ps(
45364 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45365 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45366 -1., -1., -1., -1.,
45367 -1., -1., -1., -1.,
45368 );
45369 assert_eq_m512(r, e);
45370 }
45371
45372 #[simd_test(enable = "avx512f")]
45373 unsafe fn test_mm512_fnmadd_round_ps() {
45374 let a = _mm512_set1_ps(0.00000007);
45375 let b = _mm512_set1_ps(1.);
45376 let c = _mm512_set1_ps(1.);
45377 let r =
45378 _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45379 let e = _mm512_set1_ps(0.99999994);
45380 assert_eq_m512(r, e);
45381 let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45382 let e = _mm512_set1_ps(0.9999999);
45383 assert_eq_m512(r, e);
45384 }
45385
45386 #[simd_test(enable = "avx512f")]
45387 unsafe fn test_mm512_mask_fnmadd_round_ps() {
45388 let a = _mm512_set1_ps(0.00000007);
45389 let b = _mm512_set1_ps(1.);
45390 let c = _mm512_set1_ps(1.);
45391 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45392 a, 0, b, c,
45393 );
45394 assert_eq_m512(r, a);
45395 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45396 a,
45397 0b00000000_11111111,
45398 b,
45399 c,
45400 );
45401 let e = _mm512_setr_ps(
45402 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45403 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45404 0.00000007, 0.00000007,
45405 );
45406 assert_eq_m512(r, e);
45407 }
45408
45409 #[simd_test(enable = "avx512f")]
45410 unsafe fn test_mm512_maskz_fnmadd_round_ps() {
45411 let a = _mm512_set1_ps(0.00000007);
45412 let b = _mm512_set1_ps(1.);
45413 let c = _mm512_set1_ps(1.);
45414 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45415 0, a, b, c,
45416 );
45417 assert_eq_m512(r, _mm512_setzero_ps());
45418 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45419 0b00000000_11111111,
45420 a,
45421 b,
45422 c,
45423 );
45424 let e = _mm512_setr_ps(
45425 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45426 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
45427 );
45428 assert_eq_m512(r, e);
45429 }
45430
45431 #[simd_test(enable = "avx512f")]
45432 unsafe fn test_mm512_mask3_fnmadd_round_ps() {
45433 let a = _mm512_set1_ps(0.00000007);
45434 let b = _mm512_set1_ps(1.);
45435 let c = _mm512_set1_ps(1.);
45436 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45437 a, b, c, 0,
45438 );
45439 assert_eq_m512(r, c);
45440 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45441 a,
45442 b,
45443 c,
45444 0b00000000_11111111,
45445 );
45446 let e = _mm512_setr_ps(
45447 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45448 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
45449 );
45450 assert_eq_m512(r, e);
45451 }
45452
45453 #[simd_test(enable = "avx512f")]
45454 unsafe fn test_mm512_fnmsub_round_ps() {
45455 let a = _mm512_set1_ps(0.00000007);
45456 let b = _mm512_set1_ps(1.);
45457 let c = _mm512_set1_ps(-1.);
45458 let r =
45459 _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45460 let e = _mm512_set1_ps(0.99999994);
45461 assert_eq_m512(r, e);
45462 let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45463 let e = _mm512_set1_ps(0.9999999);
45464 assert_eq_m512(r, e);
45465 }
45466
45467 #[simd_test(enable = "avx512f")]
45468 unsafe fn test_mm512_mask_fnmsub_round_ps() {
45469 let a = _mm512_set1_ps(0.00000007);
45470 let b = _mm512_set1_ps(1.);
45471 let c = _mm512_set1_ps(-1.);
45472 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45473 a, 0, b, c,
45474 );
45475 assert_eq_m512(r, a);
45476 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45477 a,
45478 0b00000000_11111111,
45479 b,
45480 c,
45481 );
45482 let e = _mm512_setr_ps(
45483 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45484 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45485 0.00000007, 0.00000007,
45486 );
45487 assert_eq_m512(r, e);
45488 }
45489
45490 #[simd_test(enable = "avx512f")]
45491 unsafe fn test_mm512_maskz_fnmsub_round_ps() {
45492 let a = _mm512_set1_ps(0.00000007);
45493 let b = _mm512_set1_ps(1.);
45494 let c = _mm512_set1_ps(-1.);
45495 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45496 0, a, b, c,
45497 );
45498 assert_eq_m512(r, _mm512_setzero_ps());
45499 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45500 0b00000000_11111111,
45501 a,
45502 b,
45503 c,
45504 );
45505 let e = _mm512_setr_ps(
45506 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45507 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
45508 );
45509 assert_eq_m512(r, e);
45510 }
45511
45512 #[simd_test(enable = "avx512f")]
45513 unsafe fn test_mm512_mask3_fnmsub_round_ps() {
45514 let a = _mm512_set1_ps(0.00000007);
45515 let b = _mm512_set1_ps(1.);
45516 let c = _mm512_set1_ps(-1.);
45517 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45518 a, b, c, 0,
45519 );
45520 assert_eq_m512(r, c);
45521 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45522 a,
45523 b,
45524 c,
45525 0b00000000_11111111,
45526 );
45527 let e = _mm512_setr_ps(
45528 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45529 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
45530 );
45531 assert_eq_m512(r, e);
45532 }
45533
45534 #[simd_test(enable = "avx512f")]
45535 unsafe fn test_mm512_max_round_ps() {
45536 let a = _mm512_setr_ps(
45537 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45538 );
45539 let b = _mm512_setr_ps(
45540 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45541 );
45542 let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
45543 let e = _mm512_setr_ps(
45544 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
45545 );
45546 assert_eq_m512(r, e);
45547 }
45548
45549 #[simd_test(enable = "avx512f")]
45550 unsafe fn test_mm512_mask_max_round_ps() {
45551 let a = _mm512_setr_ps(
45552 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45553 );
45554 let b = _mm512_setr_ps(
45555 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45556 );
45557 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
45558 assert_eq_m512(r, a);
45559 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
45560 let e = _mm512_setr_ps(
45561 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
45562 );
45563 assert_eq_m512(r, e);
45564 }
45565
45566 #[simd_test(enable = "avx512f")]
45567 unsafe fn test_mm512_maskz_max_round_ps() {
45568 let a = _mm512_setr_ps(
45569 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45570 );
45571 let b = _mm512_setr_ps(
45572 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45573 );
45574 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
45575 assert_eq_m512(r, _mm512_setzero_ps());
45576 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
45577 let e = _mm512_setr_ps(
45578 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45579 );
45580 assert_eq_m512(r, e);
45581 }
45582
45583 #[simd_test(enable = "avx512f")]
45584 unsafe fn test_mm512_min_round_ps() {
45585 let a = _mm512_setr_ps(
45586 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45587 );
45588 let b = _mm512_setr_ps(
45589 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45590 );
45591 let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
45592 let e = _mm512_setr_ps(
45593 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
45594 );
45595 assert_eq_m512(r, e);
45596 }
45597
45598 #[simd_test(enable = "avx512f")]
45599 unsafe fn test_mm512_mask_min_round_ps() {
45600 let a = _mm512_setr_ps(
45601 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45602 );
45603 let b = _mm512_setr_ps(
45604 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45605 );
45606 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
45607 assert_eq_m512(r, a);
45608 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
45609 let e = _mm512_setr_ps(
45610 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45611 );
45612 assert_eq_m512(r, e);
45613 }
45614
45615 #[simd_test(enable = "avx512f")]
45616 unsafe fn test_mm512_maskz_min_round_ps() {
45617 let a = _mm512_setr_ps(
45618 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45619 );
45620 let b = _mm512_setr_ps(
45621 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45622 );
45623 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
45624 assert_eq_m512(r, _mm512_setzero_ps());
45625 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
45626 let e = _mm512_setr_ps(
45627 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
45628 );
45629 assert_eq_m512(r, e);
45630 }
45631
45632 #[simd_test(enable = "avx512f")]
45633 unsafe fn test_mm512_getexp_round_ps() {
45634 let a = _mm512_set1_ps(3.);
45635 let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
45636 let e = _mm512_set1_ps(1.);
45637 assert_eq_m512(r, e);
45638 }
45639
45640 #[simd_test(enable = "avx512f")]
45641 unsafe fn test_mm512_mask_getexp_round_ps() {
45642 let a = _mm512_set1_ps(3.);
45643 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
45644 assert_eq_m512(r, a);
45645 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
45646 let e = _mm512_setr_ps(
45647 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45648 );
45649 assert_eq_m512(r, e);
45650 }
45651
45652 #[simd_test(enable = "avx512f")]
45653 unsafe fn test_mm512_maskz_getexp_round_ps() {
45654 let a = _mm512_set1_ps(3.);
45655 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
45656 assert_eq_m512(r, _mm512_setzero_ps());
45657 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
45658 let e = _mm512_setr_ps(
45659 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45660 );
45661 assert_eq_m512(r, e);
45662 }
45663
45664 #[simd_test(enable = "avx512f")]
45665 unsafe fn test_mm512_roundscale_round_ps() {
45666 let a = _mm512_set1_ps(1.1);
45667 let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
45668 let e = _mm512_set1_ps(1.0);
45669 assert_eq_m512(r, e);
45670 }
45671
45672 #[simd_test(enable = "avx512f")]
45673 unsafe fn test_mm512_mask_roundscale_round_ps() {
45674 let a = _mm512_set1_ps(1.1);
45675 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
45676 let e = _mm512_set1_ps(1.1);
45677 assert_eq_m512(r, e);
45678 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
45679 a,
45680 0b11111111_11111111,
45681 a,
45682 );
45683 let e = _mm512_set1_ps(1.0);
45684 assert_eq_m512(r, e);
45685 }
45686
45687 #[simd_test(enable = "avx512f")]
45688 unsafe fn test_mm512_maskz_roundscale_round_ps() {
45689 let a = _mm512_set1_ps(1.1);
45690 let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
45691 assert_eq_m512(r, _mm512_setzero_ps());
45692 let r =
45693 _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
45694 let e = _mm512_set1_ps(1.0);
45695 assert_eq_m512(r, e);
45696 }
45697
45698 #[simd_test(enable = "avx512f")]
45699 unsafe fn test_mm512_scalef_round_ps() {
45700 let a = _mm512_set1_ps(1.);
45701 let b = _mm512_set1_ps(3.);
45702 let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
45703 let e = _mm512_set1_ps(8.);
45704 assert_eq_m512(r, e);
45705 }
45706
45707 #[simd_test(enable = "avx512f")]
45708 unsafe fn test_mm512_mask_scalef_round_ps() {
45709 let a = _mm512_set1_ps(1.);
45710 let b = _mm512_set1_ps(3.);
45711 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45712 a, 0, a, b,
45713 );
45714 assert_eq_m512(r, a);
45715 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45716 a,
45717 0b11111111_00000000,
45718 a,
45719 b,
45720 );
45721 let e = _mm512_set_ps(
45722 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45723 );
45724 assert_eq_m512(r, e);
45725 }
45726
45727 #[simd_test(enable = "avx512f")]
45728 unsafe fn test_mm512_maskz_scalef_round_ps() {
45729 let a = _mm512_set1_ps(1.);
45730 let b = _mm512_set1_ps(3.);
45731 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45732 0, a, b,
45733 );
45734 assert_eq_m512(r, _mm512_setzero_ps());
45735 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45736 0b11111111_00000000,
45737 a,
45738 b,
45739 );
45740 let e = _mm512_set_ps(
45741 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45742 );
45743 assert_eq_m512(r, e);
45744 }
45745
45746 #[simd_test(enable = "avx512f")]
45747 unsafe fn test_mm512_fixupimm_round_ps() {
45748 let a = _mm512_set1_ps(f32::NAN);
45749 let b = _mm512_set1_ps(f32::MAX);
45750 let c = _mm512_set1_epi32(i32::MAX);
45751 let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
45752 let e = _mm512_set1_ps(0.0);
45753 assert_eq_m512(r, e);
45754 }
45755
45756 #[simd_test(enable = "avx512f")]
45757 unsafe fn test_mm512_mask_fixupimm_round_ps() {
45758 #[rustfmt::skip]
45759 let a = _mm512_set_ps(
45760 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45761 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45762 1., 1., 1., 1.,
45763 1., 1., 1., 1.,
45764 );
45765 let b = _mm512_set1_ps(f32::MAX);
45766 let c = _mm512_set1_epi32(i32::MAX);
45767 let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
45768 a,
45769 0b11111111_00000000,
45770 b,
45771 c,
45772 );
45773 let e = _mm512_set_ps(
45774 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45775 );
45776 assert_eq_m512(r, e);
45777 }
45778
45779 #[simd_test(enable = "avx512f")]
45780 unsafe fn test_mm512_maskz_fixupimm_round_ps() {
45781 #[rustfmt::skip]
45782 let a = _mm512_set_ps(
45783 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45784 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45785 1., 1., 1., 1.,
45786 1., 1., 1., 1.,
45787 );
45788 let b = _mm512_set1_ps(f32::MAX);
45789 let c = _mm512_set1_epi32(i32::MAX);
45790 let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
45791 0b11111111_00000000,
45792 a,
45793 b,
45794 c,
45795 );
45796 let e = _mm512_set_ps(
45797 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
45798 );
45799 assert_eq_m512(r, e);
45800 }
45801
45802 #[simd_test(enable = "avx512f")]
45803 unsafe fn test_mm512_getmant_round_ps() {
45804 let a = _mm512_set1_ps(10.);
45805 let r = _mm512_getmant_round_ps::<
45806 _MM_MANT_NORM_1_2,
45807 _MM_MANT_SIGN_SRC,
45808 _MM_FROUND_CUR_DIRECTION,
45809 >(a);
45810 let e = _mm512_set1_ps(1.25);
45811 assert_eq_m512(r, e);
45812 }
45813
45814 #[simd_test(enable = "avx512f")]
45815 unsafe fn test_mm512_mask_getmant_round_ps() {
45816 let a = _mm512_set1_ps(10.);
45817 let r = _mm512_mask_getmant_round_ps::<
45818 _MM_MANT_NORM_1_2,
45819 _MM_MANT_SIGN_SRC,
45820 _MM_FROUND_CUR_DIRECTION,
45821 >(a, 0, a);
45822 assert_eq_m512(r, a);
45823 let r = _mm512_mask_getmant_round_ps::<
45824 _MM_MANT_NORM_1_2,
45825 _MM_MANT_SIGN_SRC,
45826 _MM_FROUND_CUR_DIRECTION,
45827 >(a, 0b11111111_00000000, a);
45828 let e = _mm512_setr_ps(
45829 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
45830 );
45831 assert_eq_m512(r, e);
45832 }
45833
45834 #[simd_test(enable = "avx512f")]
45835 unsafe fn test_mm512_maskz_getmant_round_ps() {
45836 let a = _mm512_set1_ps(10.);
45837 let r = _mm512_maskz_getmant_round_ps::<
45838 _MM_MANT_NORM_1_2,
45839 _MM_MANT_SIGN_SRC,
45840 _MM_FROUND_CUR_DIRECTION,
45841 >(0, a);
45842 assert_eq_m512(r, _mm512_setzero_ps());
45843 let r = _mm512_maskz_getmant_round_ps::<
45844 _MM_MANT_NORM_1_2,
45845 _MM_MANT_SIGN_SRC,
45846 _MM_FROUND_CUR_DIRECTION,
45847 >(0b11111111_00000000, a);
45848 let e = _mm512_setr_ps(
45849 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
45850 );
45851 assert_eq_m512(r, e);
45852 }
45853
45854 #[simd_test(enable = "avx512f")]
45855 unsafe fn test_mm512_cvtps_epi32() {
45856 let a = _mm512_setr_ps(
45857 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45858 );
45859 let r = _mm512_cvtps_epi32(a);
45860 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
45861 assert_eq_m512i(r, e);
45862 }
45863
45864 #[simd_test(enable = "avx512f")]
45865 unsafe fn test_mm512_mask_cvtps_epi32() {
45866 let a = _mm512_setr_ps(
45867 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45868 );
45869 let src = _mm512_set1_epi32(0);
45870 let r = _mm512_mask_cvtps_epi32(src, 0, a);
45871 assert_eq_m512i(r, src);
45872 let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
45873 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
45874 assert_eq_m512i(r, e);
45875 }
45876
45877 #[simd_test(enable = "avx512f")]
45878 unsafe fn test_mm512_maskz_cvtps_epi32() {
45879 let a = _mm512_setr_ps(
45880 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45881 );
45882 let r = _mm512_maskz_cvtps_epi32(0, a);
45883 assert_eq_m512i(r, _mm512_setzero_si512());
45884 let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
45885 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
45886 assert_eq_m512i(r, e);
45887 }
45888
45889 #[simd_test(enable = "avx512f,avx512vl")]
45890 unsafe fn test_mm256_mask_cvtps_epi32() {
45891 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45892 let src = _mm256_set1_epi32(0);
45893 let r = _mm256_mask_cvtps_epi32(src, 0, a);
45894 assert_eq_m256i(r, src);
45895 let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
45896 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45897 assert_eq_m256i(r, e);
45898 }
45899
45900 #[simd_test(enable = "avx512f,avx512vl")]
45901 unsafe fn test_mm256_maskz_cvtps_epi32() {
45902 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45903 let r = _mm256_maskz_cvtps_epi32(0, a);
45904 assert_eq_m256i(r, _mm256_setzero_si256());
45905 let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
45906 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45907 assert_eq_m256i(r, e);
45908 }
45909
45910 #[simd_test(enable = "avx512f,avx512vl")]
45911 unsafe fn test_mm_mask_cvtps_epi32() {
45912 let a = _mm_set_ps(12., 13.5, 14., 15.5);
45913 let src = _mm_set1_epi32(0);
45914 let r = _mm_mask_cvtps_epi32(src, 0, a);
45915 assert_eq_m128i(r, src);
45916 let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
45917 let e = _mm_set_epi32(12, 14, 14, 16);
45918 assert_eq_m128i(r, e);
45919 }
45920
45921 #[simd_test(enable = "avx512f,avx512vl")]
45922 unsafe fn test_mm_maskz_cvtps_epi32() {
45923 let a = _mm_set_ps(12., 13.5, 14., 15.5);
45924 let r = _mm_maskz_cvtps_epi32(0, a);
45925 assert_eq_m128i(r, _mm_setzero_si128());
45926 let r = _mm_maskz_cvtps_epi32(0b00001111, a);
45927 let e = _mm_set_epi32(12, 14, 14, 16);
45928 assert_eq_m128i(r, e);
45929 }
45930
45931 #[simd_test(enable = "avx512f")]
45932 unsafe fn test_mm512_cvtps_epu32() {
45933 let a = _mm512_setr_ps(
45934 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45935 );
45936 let r = _mm512_cvtps_epu32(a);
45937 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
45938 assert_eq_m512i(r, e);
45939 }
45940
45941 #[simd_test(enable = "avx512f")]
45942 unsafe fn test_mm512_mask_cvtps_epu32() {
45943 let a = _mm512_setr_ps(
45944 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45945 );
45946 let src = _mm512_set1_epi32(0);
45947 let r = _mm512_mask_cvtps_epu32(src, 0, a);
45948 assert_eq_m512i(r, src);
45949 let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
45950 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
45951 assert_eq_m512i(r, e);
45952 }
45953
45954 #[simd_test(enable = "avx512f")]
45955 unsafe fn test_mm512_maskz_cvtps_epu32() {
45956 let a = _mm512_setr_ps(
45957 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45958 );
45959 let r = _mm512_maskz_cvtps_epu32(0, a);
45960 assert_eq_m512i(r, _mm512_setzero_si512());
45961 let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
45962 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
45963 assert_eq_m512i(r, e);
45964 }
45965
45966 #[simd_test(enable = "avx512f,avx512vl")]
45967 unsafe fn test_mm256_cvtps_epu32() {
45968 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45969 let r = _mm256_cvtps_epu32(a);
45970 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45971 assert_eq_m256i(r, e);
45972 }
45973
45974 #[simd_test(enable = "avx512f,avx512vl")]
45975 unsafe fn test_mm256_mask_cvtps_epu32() {
45976 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45977 let src = _mm256_set1_epi32(0);
45978 let r = _mm256_mask_cvtps_epu32(src, 0, a);
45979 assert_eq_m256i(r, src);
45980 let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
45981 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45982 assert_eq_m256i(r, e);
45983 }
45984
45985 #[simd_test(enable = "avx512f,avx512vl")]
45986 unsafe fn test_mm256_maskz_cvtps_epu32() {
45987 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45988 let r = _mm256_maskz_cvtps_epu32(0, a);
45989 assert_eq_m256i(r, _mm256_setzero_si256());
45990 let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
45991 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45992 assert_eq_m256i(r, e);
45993 }
45994
45995 #[simd_test(enable = "avx512f,avx512vl")]
45996 unsafe fn test_mm_cvtps_epu32() {
45997 let a = _mm_set_ps(12., 13.5, 14., 15.5);
45998 let r = _mm_cvtps_epu32(a);
45999 let e = _mm_set_epi32(12, 14, 14, 16);
46000 assert_eq_m128i(r, e);
46001 }
46002
46003 #[simd_test(enable = "avx512f,avx512vl")]
46004 unsafe fn test_mm_mask_cvtps_epu32() {
46005 let a = _mm_set_ps(12., 13.5, 14., 15.5);
46006 let src = _mm_set1_epi32(0);
46007 let r = _mm_mask_cvtps_epu32(src, 0, a);
46008 assert_eq_m128i(r, src);
46009 let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
46010 let e = _mm_set_epi32(12, 14, 14, 16);
46011 assert_eq_m128i(r, e);
46012 }
46013
46014 #[simd_test(enable = "avx512f,avx512vl")]
46015 unsafe fn test_mm_maskz_cvtps_epu32() {
46016 let a = _mm_set_ps(12., 13.5, 14., 15.5);
46017 let r = _mm_maskz_cvtps_epu32(0, a);
46018 assert_eq_m128i(r, _mm_setzero_si128());
46019 let r = _mm_maskz_cvtps_epu32(0b00001111, a);
46020 let e = _mm_set_epi32(12, 14, 14, 16);
46021 assert_eq_m128i(r, e);
46022 }
46023
46024 #[simd_test(enable = "avx512f")]
46025 unsafe fn test_mm512_cvtepi8_epi32() {
46026 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46027 let r = _mm512_cvtepi8_epi32(a);
46028 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46029 assert_eq_m512i(r, e);
46030 }
46031
46032 #[simd_test(enable = "avx512f")]
46033 unsafe fn test_mm512_mask_cvtepi8_epi32() {
46034 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46035 let src = _mm512_set1_epi32(-1);
46036 let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
46037 assert_eq_m512i(r, src);
46038 let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
46039 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46040 assert_eq_m512i(r, e);
46041 }
46042
46043 #[simd_test(enable = "avx512f")]
46044 unsafe fn test_mm512_maskz_cvtepi8_epi32() {
46045 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46046 let r = _mm512_maskz_cvtepi8_epi32(0, a);
46047 assert_eq_m512i(r, _mm512_setzero_si512());
46048 let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
46049 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46050 assert_eq_m512i(r, e);
46051 }
46052
46053 #[simd_test(enable = "avx512f,avx512vl")]
46054 unsafe fn test_mm256_mask_cvtepi8_epi32() {
46055 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46056 let src = _mm256_set1_epi32(-1);
46057 let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
46058 assert_eq_m256i(r, src);
46059 let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
46060 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46061 assert_eq_m256i(r, e);
46062 }
46063
46064 #[simd_test(enable = "avx512f,avx512vl")]
46065 unsafe fn test_mm256_maskz_cvtepi8_epi32() {
46066 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46067 let r = _mm256_maskz_cvtepi8_epi32(0, a);
46068 assert_eq_m256i(r, _mm256_setzero_si256());
46069 let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
46070 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46071 assert_eq_m256i(r, e);
46072 }
46073
46074 #[simd_test(enable = "avx512f,avx512vl")]
46075 unsafe fn test_mm_mask_cvtepi8_epi32() {
46076 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46077 let src = _mm_set1_epi32(-1);
46078 let r = _mm_mask_cvtepi8_epi32(src, 0, a);
46079 assert_eq_m128i(r, src);
46080 let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
46081 let e = _mm_set_epi32(12, 13, 14, 15);
46082 assert_eq_m128i(r, e);
46083 }
46084
46085 #[simd_test(enable = "avx512f,avx512vl")]
46086 unsafe fn test_mm_maskz_cvtepi8_epi32() {
46087 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46088 let r = _mm_maskz_cvtepi8_epi32(0, a);
46089 assert_eq_m128i(r, _mm_setzero_si128());
46090 let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
46091 let e = _mm_set_epi32(12, 13, 14, 15);
46092 assert_eq_m128i(r, e);
46093 }
46094
46095 #[simd_test(enable = "avx512f")]
46096 unsafe fn test_mm512_cvtepu8_epi32() {
46097 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46098 let r = _mm512_cvtepu8_epi32(a);
46099 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46100 assert_eq_m512i(r, e);
46101 }
46102
46103 #[simd_test(enable = "avx512f")]
46104 unsafe fn test_mm512_mask_cvtepu8_epi32() {
46105 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46106 let src = _mm512_set1_epi32(-1);
46107 let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
46108 assert_eq_m512i(r, src);
46109 let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
46110 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46111 assert_eq_m512i(r, e);
46112 }
46113
46114 #[simd_test(enable = "avx512f")]
46115 unsafe fn test_mm512_maskz_cvtepu8_epi32() {
46116 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46117 let r = _mm512_maskz_cvtepu8_epi32(0, a);
46118 assert_eq_m512i(r, _mm512_setzero_si512());
46119 let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
46120 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46121 assert_eq_m512i(r, e);
46122 }
46123
46124 #[simd_test(enable = "avx512f,avx512vl")]
46125 unsafe fn test_mm256_mask_cvtepu8_epi32() {
46126 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46127 let src = _mm256_set1_epi32(-1);
46128 let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
46129 assert_eq_m256i(r, src);
46130 let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
46131 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46132 assert_eq_m256i(r, e);
46133 }
46134
46135 #[simd_test(enable = "avx512f,avx512vl")]
46136 unsafe fn test_mm256_maskz_cvtepu8_epi32() {
46137 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46138 let r = _mm256_maskz_cvtepu8_epi32(0, a);
46139 assert_eq_m256i(r, _mm256_setzero_si256());
46140 let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
46141 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46142 assert_eq_m256i(r, e);
46143 }
46144
46145 #[simd_test(enable = "avx512f,avx512vl")]
46146 unsafe fn test_mm_mask_cvtepu8_epi32() {
46147 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46148 let src = _mm_set1_epi32(-1);
46149 let r = _mm_mask_cvtepu8_epi32(src, 0, a);
46150 assert_eq_m128i(r, src);
46151 let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
46152 let e = _mm_set_epi32(12, 13, 14, 15);
46153 assert_eq_m128i(r, e);
46154 }
46155
46156 #[simd_test(enable = "avx512f,avx512vl")]
46157 unsafe fn test_mm_maskz_cvtepu8_epi32() {
46158 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46159 let r = _mm_maskz_cvtepu8_epi32(0, a);
46160 assert_eq_m128i(r, _mm_setzero_si128());
46161 let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
46162 let e = _mm_set_epi32(12, 13, 14, 15);
46163 assert_eq_m128i(r, e);
46164 }
46165
46166 #[simd_test(enable = "avx512f")]
46167 unsafe fn test_mm512_cvtepi16_epi32() {
46168 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46169 let r = _mm512_cvtepi16_epi32(a);
46170 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46171 assert_eq_m512i(r, e);
46172 }
46173
46174 #[simd_test(enable = "avx512f")]
46175 unsafe fn test_mm512_mask_cvtepi16_epi32() {
46176 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46177 let src = _mm512_set1_epi32(-1);
46178 let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
46179 assert_eq_m512i(r, src);
46180 let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
46181 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46182 assert_eq_m512i(r, e);
46183 }
46184
46185 #[simd_test(enable = "avx512f")]
46186 unsafe fn test_mm512_maskz_cvtepi16_epi32() {
46187 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46188 let r = _mm512_maskz_cvtepi16_epi32(0, a);
46189 assert_eq_m512i(r, _mm512_setzero_si512());
46190 let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
46191 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46192 assert_eq_m512i(r, e);
46193 }
46194
46195 #[simd_test(enable = "avx512f,avx512vl")]
46196 unsafe fn test_mm256_mask_cvtepi16_epi32() {
46197 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46198 let src = _mm256_set1_epi32(-1);
46199 let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
46200 assert_eq_m256i(r, src);
46201 let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
46202 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46203 assert_eq_m256i(r, e);
46204 }
46205
46206 #[simd_test(enable = "avx512f,avx512vl")]
46207 unsafe fn test_mm256_maskz_cvtepi16_epi32() {
46208 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46209 let r = _mm256_maskz_cvtepi16_epi32(0, a);
46210 assert_eq_m256i(r, _mm256_setzero_si256());
46211 let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
46212 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46213 assert_eq_m256i(r, e);
46214 }
46215
46216 #[simd_test(enable = "avx512f,avx512vl")]
46217 unsafe fn test_mm_mask_cvtepi16_epi32() {
46218 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46219 let src = _mm_set1_epi32(-1);
46220 let r = _mm_mask_cvtepi16_epi32(src, 0, a);
46221 assert_eq_m128i(r, src);
46222 let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
46223 let e = _mm_set_epi32(4, 5, 6, 7);
46224 assert_eq_m128i(r, e);
46225 }
46226
46227 #[simd_test(enable = "avx512f,avx512vl")]
46228 unsafe fn test_mm_maskz_cvtepi16_epi32() {
46229 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46230 let r = _mm_maskz_cvtepi16_epi32(0, a);
46231 assert_eq_m128i(r, _mm_setzero_si128());
46232 let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
46233 let e = _mm_set_epi32(4, 5, 6, 7);
46234 assert_eq_m128i(r, e);
46235 }
46236
46237 #[simd_test(enable = "avx512f")]
46238 unsafe fn test_mm512_cvtepu16_epi32() {
46239 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46240 let r = _mm512_cvtepu16_epi32(a);
46241 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46242 assert_eq_m512i(r, e);
46243 }
46244
46245 #[simd_test(enable = "avx512f")]
46246 unsafe fn test_mm512_mask_cvtepu16_epi32() {
46247 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46248 let src = _mm512_set1_epi32(-1);
46249 let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
46250 assert_eq_m512i(r, src);
46251 let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
46252 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46253 assert_eq_m512i(r, e);
46254 }
46255
46256 #[simd_test(enable = "avx512f")]
46257 unsafe fn test_mm512_maskz_cvtepu16_epi32() {
46258 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46259 let r = _mm512_maskz_cvtepu16_epi32(0, a);
46260 assert_eq_m512i(r, _mm512_setzero_si512());
46261 let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
46262 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46263 assert_eq_m512i(r, e);
46264 }
46265
46266 #[simd_test(enable = "avx512f,avx512vl")]
46267 unsafe fn test_mm256_mask_cvtepu16_epi32() {
46268 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46269 let src = _mm256_set1_epi32(-1);
46270 let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
46271 assert_eq_m256i(r, src);
46272 let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
46273 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46274 assert_eq_m256i(r, e);
46275 }
46276
46277 #[simd_test(enable = "avx512f,avx512vl")]
46278 unsafe fn test_mm256_maskz_cvtepu16_epi32() {
46279 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46280 let r = _mm256_maskz_cvtepu16_epi32(0, a);
46281 assert_eq_m256i(r, _mm256_setzero_si256());
46282 let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
46283 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46284 assert_eq_m256i(r, e);
46285 }
46286
46287 #[simd_test(enable = "avx512f,avx512vl")]
46288 unsafe fn test_mm_mask_cvtepu16_epi32() {
46289 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46290 let src = _mm_set1_epi32(-1);
46291 let r = _mm_mask_cvtepu16_epi32(src, 0, a);
46292 assert_eq_m128i(r, src);
46293 let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
46294 let e = _mm_set_epi32(12, 13, 14, 15);
46295 assert_eq_m128i(r, e);
46296 }
46297
46298 #[simd_test(enable = "avx512f,avx512vl")]
46299 unsafe fn test_mm_maskz_cvtepu16_epi32() {
46300 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46301 let r = _mm_maskz_cvtepu16_epi32(0, a);
46302 assert_eq_m128i(r, _mm_setzero_si128());
46303 let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
46304 let e = _mm_set_epi32(12, 13, 14, 15);
46305 assert_eq_m128i(r, e);
46306 }
46307
46308 #[simd_test(enable = "avx512f")]
46309 unsafe fn test_mm512_cvtepi32_ps() {
46310 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46311 let r = _mm512_cvtepi32_ps(a);
46312 let e = _mm512_set_ps(
46313 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46314 );
46315 assert_eq_m512(r, e);
46316 }
46317
46318 #[simd_test(enable = "avx512f")]
46319 unsafe fn test_mm512_mask_cvtepi32_ps() {
46320 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46321 let src = _mm512_set1_ps(-1.);
46322 let r = _mm512_mask_cvtepi32_ps(src, 0, a);
46323 assert_eq_m512(r, src);
46324 let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
46325 let e = _mm512_set_ps(
46326 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
46327 );
46328 assert_eq_m512(r, e);
46329 }
46330
46331 #[simd_test(enable = "avx512f")]
46332 unsafe fn test_mm512_maskz_cvtepi32_ps() {
46333 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46334 let r = _mm512_maskz_cvtepi32_ps(0, a);
46335 assert_eq_m512(r, _mm512_setzero_ps());
46336 let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
46337 let e = _mm512_set_ps(
46338 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
46339 );
46340 assert_eq_m512(r, e);
46341 }
46342
46343 #[simd_test(enable = "avx512f,avx512vl")]
46344 unsafe fn test_mm256_mask_cvtepi32_ps() {
46345 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46346 let src = _mm256_set1_ps(-1.);
46347 let r = _mm256_mask_cvtepi32_ps(src, 0, a);
46348 assert_eq_m256(r, src);
46349 let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
46350 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46351 assert_eq_m256(r, e);
46352 }
46353
46354 #[simd_test(enable = "avx512f,avx512vl")]
46355 unsafe fn test_mm256_maskz_cvtepi32_ps() {
46356 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46357 let r = _mm256_maskz_cvtepi32_ps(0, a);
46358 assert_eq_m256(r, _mm256_setzero_ps());
46359 let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
46360 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46361 assert_eq_m256(r, e);
46362 }
46363
46364 #[simd_test(enable = "avx512f,avx512vl")]
46365 unsafe fn test_mm_mask_cvtepi32_ps() {
46366 let a = _mm_set_epi32(1, 2, 3, 4);
46367 let src = _mm_set1_ps(-1.);
46368 let r = _mm_mask_cvtepi32_ps(src, 0, a);
46369 assert_eq_m128(r, src);
46370 let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
46371 let e = _mm_set_ps(1., 2., 3., 4.);
46372 assert_eq_m128(r, e);
46373 }
46374
46375 #[simd_test(enable = "avx512f,avx512vl")]
46376 unsafe fn test_mm_maskz_cvtepi32_ps() {
46377 let a = _mm_set_epi32(1, 2, 3, 4);
46378 let r = _mm_maskz_cvtepi32_ps(0, a);
46379 assert_eq_m128(r, _mm_setzero_ps());
46380 let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
46381 let e = _mm_set_ps(1., 2., 3., 4.);
46382 assert_eq_m128(r, e);
46383 }
46384
46385 #[simd_test(enable = "avx512f")]
46386 unsafe fn test_mm512_cvtepu32_ps() {
46387 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46388 let r = _mm512_cvtepu32_ps(a);
46389 let e = _mm512_set_ps(
46390 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46391 );
46392 assert_eq_m512(r, e);
46393 }
46394
46395 #[simd_test(enable = "avx512f")]
46396 unsafe fn test_mm512_mask_cvtepu32_ps() {
46397 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46398 let src = _mm512_set1_ps(-1.);
46399 let r = _mm512_mask_cvtepu32_ps(src, 0, a);
46400 assert_eq_m512(r, src);
46401 let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
46402 let e = _mm512_set_ps(
46403 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
46404 );
46405 assert_eq_m512(r, e);
46406 }
46407
46408 #[simd_test(enable = "avx512f")]
46409 unsafe fn test_mm512_maskz_cvtepu32_ps() {
46410 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46411 let r = _mm512_maskz_cvtepu32_ps(0, a);
46412 assert_eq_m512(r, _mm512_setzero_ps());
46413 let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
46414 let e = _mm512_set_ps(
46415 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
46416 );
46417 assert_eq_m512(r, e);
46418 }
46419
46420 #[simd_test(enable = "avx512f")]
46421 unsafe fn test_mm512_cvtepi32_epi16() {
46422 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46423 let r = _mm512_cvtepi32_epi16(a);
46424 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46425 assert_eq_m256i(r, e);
46426 }
46427
46428 #[simd_test(enable = "avx512f")]
46429 unsafe fn test_mm512_mask_cvtepi32_epi16() {
46430 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46431 let src = _mm256_set1_epi16(-1);
46432 let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
46433 assert_eq_m256i(r, src);
46434 let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
46435 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46436 assert_eq_m256i(r, e);
46437 }
46438
46439 #[simd_test(enable = "avx512f")]
46440 unsafe fn test_mm512_maskz_cvtepi32_epi16() {
46441 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46442 let r = _mm512_maskz_cvtepi32_epi16(0, a);
46443 assert_eq_m256i(r, _mm256_setzero_si256());
46444 let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
46445 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46446 assert_eq_m256i(r, e);
46447 }
46448
46449 #[simd_test(enable = "avx512f,avx512vl")]
46450 unsafe fn test_mm256_cvtepi32_epi16() {
46451 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46452 let r = _mm256_cvtepi32_epi16(a);
46453 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46454 assert_eq_m128i(r, e);
46455 }
46456
46457 #[simd_test(enable = "avx512f,avx512vl")]
46458 unsafe fn test_mm256_mask_cvtepi32_epi16() {
46459 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46460 let src = _mm_set1_epi16(-1);
46461 let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
46462 assert_eq_m128i(r, src);
46463 let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
46464 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46465 assert_eq_m128i(r, e);
46466 }
46467
46468 #[simd_test(enable = "avx512f,avx512vl")]
46469 unsafe fn test_mm256_maskz_cvtepi32_epi16() {
46470 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46471 let r = _mm256_maskz_cvtepi32_epi16(0, a);
46472 assert_eq_m128i(r, _mm_setzero_si128());
46473 let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
46474 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46475 assert_eq_m128i(r, e);
46476 }
46477
46478 #[simd_test(enable = "avx512f,avx512vl")]
46479 unsafe fn test_mm_cvtepi32_epi16() {
46480 let a = _mm_set_epi32(4, 5, 6, 7);
46481 let r = _mm_cvtepi32_epi16(a);
46482 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46483 assert_eq_m128i(r, e);
46484 }
46485
46486 #[simd_test(enable = "avx512f,avx512vl")]
46487 unsafe fn test_mm_mask_cvtepi32_epi16() {
46488 let a = _mm_set_epi32(4, 5, 6, 7);
46489 let src = _mm_set1_epi16(0);
46490 let r = _mm_mask_cvtepi32_epi16(src, 0, a);
46491 assert_eq_m128i(r, src);
46492 let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
46493 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46494 assert_eq_m128i(r, e);
46495 }
46496
46497 #[simd_test(enable = "avx512f,avx512vl")]
46498 unsafe fn test_mm_maskz_cvtepi32_epi16() {
46499 let a = _mm_set_epi32(4, 5, 6, 7);
46500 let r = _mm_maskz_cvtepi32_epi16(0, a);
46501 assert_eq_m128i(r, _mm_setzero_si128());
46502 let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
46503 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46504 assert_eq_m128i(r, e);
46505 }
46506
46507 #[simd_test(enable = "avx512f")]
46508 unsafe fn test_mm512_cvtepi32_epi8() {
46509 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46510 let r = _mm512_cvtepi32_epi8(a);
46511 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46512 assert_eq_m128i(r, e);
46513 }
46514
46515 #[simd_test(enable = "avx512f")]
46516 unsafe fn test_mm512_mask_cvtepi32_epi8() {
46517 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46518 let src = _mm_set1_epi8(-1);
46519 let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
46520 assert_eq_m128i(r, src);
46521 let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
46522 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46523 assert_eq_m128i(r, e);
46524 }
46525
46526 #[simd_test(enable = "avx512f")]
46527 unsafe fn test_mm512_maskz_cvtepi32_epi8() {
46528 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46529 let r = _mm512_maskz_cvtepi32_epi8(0, a);
46530 assert_eq_m128i(r, _mm_setzero_si128());
46531 let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
46532 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46533 assert_eq_m128i(r, e);
46534 }
46535
46536 #[simd_test(enable = "avx512f,avx512vl")]
46537 unsafe fn test_mm256_cvtepi32_epi8() {
46538 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46539 let r = _mm256_cvtepi32_epi8(a);
46540 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
46541 assert_eq_m128i(r, e);
46542 }
46543
46544 #[simd_test(enable = "avx512f,avx512vl")]
46545 unsafe fn test_mm256_mask_cvtepi32_epi8() {
46546 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46547 let src = _mm_set1_epi8(0);
46548 let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
46549 assert_eq_m128i(r, src);
46550 let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
46551 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
46552 assert_eq_m128i(r, e);
46553 }
46554
46555 #[simd_test(enable = "avx512f,avx512vl")]
46556 unsafe fn test_mm256_maskz_cvtepi32_epi8() {
46557 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46558 let r = _mm256_maskz_cvtepi32_epi8(0, a);
46559 assert_eq_m128i(r, _mm_setzero_si128());
46560 let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
46561 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
46562 assert_eq_m128i(r, e);
46563 }
46564
46565 #[simd_test(enable = "avx512f,avx512vl")]
46566 unsafe fn test_mm_cvtepi32_epi8() {
46567 let a = _mm_set_epi32(4, 5, 6, 7);
46568 let r = _mm_cvtepi32_epi8(a);
46569 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
46570 assert_eq_m128i(r, e);
46571 }
46572
46573 #[simd_test(enable = "avx512f,avx512vl")]
46574 unsafe fn test_mm_mask_cvtepi32_epi8() {
46575 let a = _mm_set_epi32(4, 5, 6, 7);
46576 let src = _mm_set1_epi8(0);
46577 let r = _mm_mask_cvtepi32_epi8(src, 0, a);
46578 assert_eq_m128i(r, src);
46579 let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
46580 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
46581 assert_eq_m128i(r, e);
46582 }
46583
46584 #[simd_test(enable = "avx512f,avx512vl")]
46585 unsafe fn test_mm_maskz_cvtepi32_epi8() {
46586 let a = _mm_set_epi32(4, 5, 6, 7);
46587 let r = _mm_maskz_cvtepi32_epi8(0, a);
46588 assert_eq_m128i(r, _mm_setzero_si128());
46589 let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
46590 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
46591 assert_eq_m128i(r, e);
46592 }
46593
46594 #[simd_test(enable = "avx512f")]
46595 unsafe fn test_mm512_cvtsepi32_epi16() {
46596 #[rustfmt::skip]
46597 let a = _mm512_set_epi32(
46598 0, 1, 2, 3,
46599 4, 5, 6, 7,
46600 8, 9, 10, 11,
46601 12, 13, i32::MIN, i32::MAX,
46602 );
46603 let r = _mm512_cvtsepi32_epi16(a);
46604 #[rustfmt::skip]
46605 let e = _mm256_set_epi16(
46606 0, 1, 2, 3,
46607 4, 5, 6, 7,
46608 8, 9, 10, 11,
46609 12, 13, i16::MIN, i16::MAX,
46610 );
46611 assert_eq_m256i(r, e);
46612 }
46613
46614 #[simd_test(enable = "avx512f")]
46615 unsafe fn test_mm512_mask_cvtsepi32_epi16() {
46616 #[rustfmt::skip]
46617 let a = _mm512_set_epi32(
46618 0, 1, 2, 3,
46619 4, 5, 6, 7,
46620 8, 9, 10, 11,
46621 12, 13, i32::MIN, i32::MAX,
46622 );
46623 let src = _mm256_set1_epi16(-1);
46624 let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
46625 assert_eq_m256i(r, src);
46626 let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
46627 #[rustfmt::skip]
46628 let e = _mm256_set_epi16(
46629 -1, -1, -1, -1,
46630 -1, -1, -1, -1,
46631 8, 9, 10, 11,
46632 12, 13, i16::MIN, i16::MAX,
46633 );
46634 assert_eq_m256i(r, e);
46635 }
46636
46637 #[simd_test(enable = "avx512f")]
46638 unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
46639 #[rustfmt::skip]
46640 let a = _mm512_set_epi32(
46641 0, 1, 2, 3,
46642 4, 5, 6, 7,
46643 8, 9, 10, 11,
46644 12, 13, i32::MIN, i32::MAX,
46645 );
46646 let r = _mm512_maskz_cvtsepi32_epi16(0, a);
46647 assert_eq_m256i(r, _mm256_setzero_si256());
46648 let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
46649 #[rustfmt::skip]
46650 let e = _mm256_set_epi16(
46651 0, 0, 0, 0,
46652 0, 0, 0, 0,
46653 8, 9, 10, 11,
46654 12, 13, i16::MIN, i16::MAX,
46655 );
46656 assert_eq_m256i(r, e);
46657 }
46658
46659 #[simd_test(enable = "avx512f,avx512vl")]
46660 unsafe fn test_mm256_cvtsepi32_epi16() {
46661 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46662 let r = _mm256_cvtsepi32_epi16(a);
46663 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46664 assert_eq_m128i(r, e);
46665 }
46666
46667 #[simd_test(enable = "avx512f,avx512vl")]
46668 unsafe fn test_mm256_mask_cvtsepi32_epi16() {
46669 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46670 let src = _mm_set1_epi16(-1);
46671 let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
46672 assert_eq_m128i(r, src);
46673 let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
46674 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46675 assert_eq_m128i(r, e);
46676 }
46677
46678 #[simd_test(enable = "avx512f,avx512vl")]
46679 unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
46680 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46681 let r = _mm256_maskz_cvtsepi32_epi16(0, a);
46682 assert_eq_m128i(r, _mm_setzero_si128());
46683 let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
46684 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46685 assert_eq_m128i(r, e);
46686 }
46687
46688 #[simd_test(enable = "avx512f,avx512vl")]
46689 unsafe fn test_mm_cvtsepi32_epi16() {
46690 let a = _mm_set_epi32(4, 5, 6, 7);
46691 let r = _mm_cvtsepi32_epi16(a);
46692 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46693 assert_eq_m128i(r, e);
46694 }
46695
46696 #[simd_test(enable = "avx512f,avx512vl")]
46697 unsafe fn test_mm_mask_cvtsepi32_epi16() {
46698 let a = _mm_set_epi32(4, 5, 6, 7);
46699 let src = _mm_set1_epi16(0);
46700 let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
46701 assert_eq_m128i(r, src);
46702 let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
46703 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46704 assert_eq_m128i(r, e);
46705 }
46706
46707 #[simd_test(enable = "avx512f,avx512vl")]
46708 unsafe fn test_mm_maskz_cvtsepi32_epi16() {
46709 let a = _mm_set_epi32(4, 5, 6, 7);
46710 let r = _mm_maskz_cvtsepi32_epi16(0, a);
46711 assert_eq_m128i(r, _mm_setzero_si128());
46712 let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
46713 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46714 assert_eq_m128i(r, e);
46715 }
46716
46717 #[simd_test(enable = "avx512f")]
46718 unsafe fn test_mm512_cvtsepi32_epi8() {
46719 #[rustfmt::skip]
46720 let a = _mm512_set_epi32(
46721 0, 1, 2, 3,
46722 4, 5, 6, 7,
46723 8, 9, 10, 11,
46724 12, 13, i32::MIN, i32::MAX,
46725 );
46726 let r = _mm512_cvtsepi32_epi8(a);
46727 #[rustfmt::skip]
46728 let e = _mm_set_epi8(
46729 0, 1, 2, 3,
46730 4, 5, 6, 7,
46731 8, 9, 10, 11,
46732 12, 13, i8::MIN, i8::MAX,
46733 );
46734 assert_eq_m128i(r, e);
46735 }
46736
46737 #[simd_test(enable = "avx512f")]
46738 unsafe fn test_mm512_mask_cvtsepi32_epi8() {
46739 #[rustfmt::skip]
46740 let a = _mm512_set_epi32(
46741 0, 1, 2, 3,
46742 4, 5, 6, 7,
46743 8, 9, 10, 11,
46744 12, 13, i32::MIN, i32::MAX,
46745 );
46746 let src = _mm_set1_epi8(-1);
46747 let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
46748 assert_eq_m128i(r, src);
46749 let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
46750 #[rustfmt::skip]
46751 let e = _mm_set_epi8(
46752 -1, -1, -1, -1,
46753 -1, -1, -1, -1,
46754 8, 9, 10, 11,
46755 12, 13, i8::MIN, i8::MAX,
46756 );
46757 assert_eq_m128i(r, e);
46758 }
46759
46760 #[simd_test(enable = "avx512f")]
46761 unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
46762 #[rustfmt::skip]
46763 let a = _mm512_set_epi32(
46764 0, 1, 2, 3,
46765 4, 5, 6, 7,
46766 8, 9, 10, 11,
46767 12, 13, i32::MIN, i32::MAX,
46768 );
46769 let r = _mm512_maskz_cvtsepi32_epi8(0, a);
46770 assert_eq_m128i(r, _mm_setzero_si128());
46771 let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
46772 #[rustfmt::skip]
46773 let e = _mm_set_epi8(
46774 0, 0, 0, 0,
46775 0, 0, 0, 0,
46776 8, 9, 10, 11,
46777 12, 13, i8::MIN, i8::MAX,
46778 );
46779 assert_eq_m128i(r, e);
46780 }
46781
46782 #[simd_test(enable = "avx512f,avx512vl")]
46783 unsafe fn test_mm256_cvtsepi32_epi8() {
46784 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
46785 let r = _mm256_cvtsepi32_epi8(a);
46786 #[rustfmt::skip]
46787 let e = _mm_set_epi8(
46788 0, 0, 0, 0,
46789 0, 0, 0, 0,
46790 9, 10, 11, 12,
46791 13, 14, 15, 16,
46792 );
46793 assert_eq_m128i(r, e);
46794 }
46795
46796 #[simd_test(enable = "avx512f,avx512vl")]
46797 unsafe fn test_mm256_mask_cvtsepi32_epi8() {
46798 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
46799 let src = _mm_set1_epi8(0);
46800 let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
46801 assert_eq_m128i(r, src);
46802 let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
46803 #[rustfmt::skip]
46804 let e = _mm_set_epi8(
46805 0, 0, 0, 0,
46806 0, 0, 0, 0,
46807 9, 10, 11, 12,
46808 13, 14, 15, 16,
46809 );
46810 assert_eq_m128i(r, e);
46811 }
46812
46813 #[simd_test(enable = "avx512f,avx512vl")]
46814 unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
46815 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
46816 let r = _mm256_maskz_cvtsepi32_epi8(0, a);
46817 assert_eq_m128i(r, _mm_setzero_si128());
46818 let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
46819 #[rustfmt::skip]
46820 let e = _mm_set_epi8(
46821 0, 0, 0, 0,
46822 0, 0, 0, 0,
46823 9, 10, 11, 12,
46824 13, 14, 15, 16,
46825 );
46826 assert_eq_m128i(r, e);
46827 }
46828
46829 #[simd_test(enable = "avx512f,avx512vl")]
46830 unsafe fn test_mm_cvtsepi32_epi8() {
46831 let a = _mm_set_epi32(13, 14, 15, 16);
46832 let r = _mm_cvtsepi32_epi8(a);
46833 #[rustfmt::skip]
46834 let e = _mm_set_epi8(
46835 0, 0, 0, 0,
46836 0, 0, 0, 0,
46837 0, 0, 0, 0,
46838 13, 14, 15, 16,
46839 );
46840 assert_eq_m128i(r, e);
46841 }
46842
46843 #[simd_test(enable = "avx512f,avx512vl")]
46844 unsafe fn test_mm_mask_cvtsepi32_epi8() {
46845 let a = _mm_set_epi32(13, 14, 15, 16);
46846 let src = _mm_set1_epi8(0);
46847 let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
46848 assert_eq_m128i(r, src);
46849 let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
46850 #[rustfmt::skip]
46851 let e = _mm_set_epi8(
46852 0, 0, 0, 0,
46853 0, 0, 0, 0,
46854 0, 0, 0, 0,
46855 13, 14, 15, 16,
46856 );
46857 assert_eq_m128i(r, e);
46858 }
46859
46860 #[simd_test(enable = "avx512f,avx512vl")]
46861 unsafe fn test_mm_maskz_cvtsepi32_epi8() {
46862 let a = _mm_set_epi32(13, 14, 15, 16);
46863 let r = _mm_maskz_cvtsepi32_epi8(0, a);
46864 assert_eq_m128i(r, _mm_setzero_si128());
46865 let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
46866 #[rustfmt::skip]
46867 let e = _mm_set_epi8(
46868 0, 0, 0, 0,
46869 0, 0, 0, 0,
46870 0, 0, 0, 0,
46871 13, 14, 15, 16,
46872 );
46873 assert_eq_m128i(r, e);
46874 }
46875
46876 #[simd_test(enable = "avx512f")]
46877 unsafe fn test_mm512_cvtusepi32_epi16() {
46878 #[rustfmt::skip]
46879 let a = _mm512_set_epi32(
46880 0, 1, 2, 3,
46881 4, 5, 6, 7,
46882 8, 9, 10, 11,
46883 12, 13, i32::MIN, i32::MIN,
46884 );
46885 let r = _mm512_cvtusepi32_epi16(a);
46886 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
46887 assert_eq_m256i(r, e);
46888 }
46889
46890 #[simd_test(enable = "avx512f")]
46891 unsafe fn test_mm512_mask_cvtusepi32_epi16() {
46892 #[rustfmt::skip]
46893 let a = _mm512_set_epi32(
46894 0, 1, 2, 3,
46895 4, 5, 6, 7,
46896 8, 9, 10, 11,
46897 12, 13, i32::MIN, i32::MIN,
46898 );
46899 let src = _mm256_set1_epi16(-1);
46900 let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
46901 assert_eq_m256i(r, src);
46902 let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
46903 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
46904 assert_eq_m256i(r, e);
46905 }
46906
46907 #[simd_test(enable = "avx512f")]
46908 unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
46909 #[rustfmt::skip]
46910 let a = _mm512_set_epi32(
46911 0, 1, 2, 3,
46912 4, 5, 6, 7,
46913 8, 9, 10, 11,
46914 12, 13, i32::MIN, i32::MIN,
46915 );
46916 let r = _mm512_maskz_cvtusepi32_epi16(0, a);
46917 assert_eq_m256i(r, _mm256_setzero_si256());
46918 let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
46919 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
46920 assert_eq_m256i(r, e);
46921 }
46922
46923 #[simd_test(enable = "avx512f,avx512vl")]
46924 unsafe fn test_mm256_cvtusepi32_epi16() {
46925 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46926 let r = _mm256_cvtusepi32_epi16(a);
46927 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
46928 assert_eq_m128i(r, e);
46929 }
46930
46931 #[simd_test(enable = "avx512f,avx512vl")]
46932 unsafe fn test_mm256_mask_cvtusepi32_epi16() {
46933 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46934 let src = _mm_set1_epi16(0);
46935 let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
46936 assert_eq_m128i(r, src);
46937 let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
46938 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
46939 assert_eq_m128i(r, e);
46940 }
46941
46942 #[simd_test(enable = "avx512f,avx512vl")]
46943 unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
46944 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46945 let r = _mm256_maskz_cvtusepi32_epi16(0, a);
46946 assert_eq_m128i(r, _mm_setzero_si128());
46947 let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
46948 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
46949 assert_eq_m128i(r, e);
46950 }
46951
46952 #[simd_test(enable = "avx512f,avx512vl")]
46953 unsafe fn test_mm_cvtusepi32_epi16() {
46954 let a = _mm_set_epi32(5, 6, 7, 8);
46955 let r = _mm_cvtusepi32_epi16(a);
46956 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
46957 assert_eq_m128i(r, e);
46958 }
46959
46960 #[simd_test(enable = "avx512f,avx512vl")]
46961 unsafe fn test_mm_mask_cvtusepi32_epi16() {
46962 let a = _mm_set_epi32(5, 6, 7, 8);
46963 let src = _mm_set1_epi16(0);
46964 let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
46965 assert_eq_m128i(r, src);
46966 let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
46967 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
46968 assert_eq_m128i(r, e);
46969 }
46970
46971 #[simd_test(enable = "avx512f,avx512vl")]
46972 unsafe fn test_mm_maskz_cvtusepi32_epi16() {
46973 let a = _mm_set_epi32(5, 6, 7, 8);
46974 let r = _mm_maskz_cvtusepi32_epi16(0, a);
46975 assert_eq_m128i(r, _mm_setzero_si128());
46976 let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
46977 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
46978 assert_eq_m128i(r, e);
46979 }
46980
46981 #[simd_test(enable = "avx512f")]
46982 unsafe fn test_mm512_cvtusepi32_epi8() {
46983 #[rustfmt::skip]
46984 let a = _mm512_set_epi32(
46985 0, 1, 2, 3,
46986 4, 5, 6, 7,
46987 8, 9, 10, 11,
46988 12, 13, i32::MIN, i32::MIN,
46989 );
46990 let r = _mm512_cvtusepi32_epi8(a);
46991 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
46992 assert_eq_m128i(r, e);
46993 }
46994
46995 #[simd_test(enable = "avx512f")]
46996 unsafe fn test_mm512_mask_cvtusepi32_epi8() {
46997 #[rustfmt::skip]
46998 let a = _mm512_set_epi32(
46999 0, 1, 2, 3,
47000 4, 5, 6, 7,
47001 8, 9, 10, 11,
47002 12, 13, i32::MIN, i32::MIN,
47003 );
47004 let src = _mm_set1_epi8(-1);
47005 let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
47006 assert_eq_m128i(r, src);
47007 let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
47008 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
47009 assert_eq_m128i(r, e);
47010 }
47011
47012 #[simd_test(enable = "avx512f")]
47013 unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
47014 #[rustfmt::skip]
47015 let a = _mm512_set_epi32(
47016 0, 1, 2, 3,
47017 4, 5, 6, 7,
47018 8, 9, 10, 11,
47019 12, 13, i32::MIN, i32::MIN,
47020 );
47021 let r = _mm512_maskz_cvtusepi32_epi8(0, a);
47022 assert_eq_m128i(r, _mm_setzero_si128());
47023 let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
47024 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
47025 assert_eq_m128i(r, e);
47026 }
47027
47028 #[simd_test(enable = "avx512f,avx512vl")]
47029 unsafe fn test_mm256_cvtusepi32_epi8() {
47030 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
47031 let r = _mm256_cvtusepi32_epi8(a);
47032 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
47033 assert_eq_m128i(r, e);
47034 }
47035
47036 #[simd_test(enable = "avx512f,avx512vl")]
47037 unsafe fn test_mm256_mask_cvtusepi32_epi8() {
47038 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
47039 let src = _mm_set1_epi8(0);
47040 let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
47041 assert_eq_m128i(r, src);
47042 let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
47043 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
47044 assert_eq_m128i(r, e);
47045 }
47046
47047 #[simd_test(enable = "avx512f,avx512vl")]
47048 unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
47049 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
47050 let r = _mm256_maskz_cvtusepi32_epi8(0, a);
47051 assert_eq_m128i(r, _mm_setzero_si128());
47052 let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
47053 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
47054 assert_eq_m128i(r, e);
47055 }
47056
47057 #[simd_test(enable = "avx512f,avx512vl")]
47058 unsafe fn test_mm_cvtusepi32_epi8() {
47059 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
47060 let r = _mm_cvtusepi32_epi8(a);
47061 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
47062 assert_eq_m128i(r, e);
47063 }
47064
47065 #[simd_test(enable = "avx512f,avx512vl")]
47066 unsafe fn test_mm_mask_cvtusepi32_epi8() {
47067 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
47068 let src = _mm_set1_epi8(0);
47069 let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
47070 assert_eq_m128i(r, src);
47071 let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
47072 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
47073 assert_eq_m128i(r, e);
47074 }
47075
47076 #[simd_test(enable = "avx512f,avx512vl")]
47077 unsafe fn test_mm_maskz_cvtusepi32_epi8() {
47078 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
47079 let r = _mm_maskz_cvtusepi32_epi8(0, a);
47080 assert_eq_m128i(r, _mm_setzero_si128());
47081 let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
47082 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
47083 assert_eq_m128i(r, e);
47084 }
47085
47086 #[simd_test(enable = "avx512f")]
47087 unsafe fn test_mm512_cvt_roundps_epi32() {
47088 let a = _mm512_setr_ps(
47089 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47090 );
47091 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
47092 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47093 assert_eq_m512i(r, e);
47094 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
47095 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
47096 assert_eq_m512i(r, e);
47097 }
47098
47099 #[simd_test(enable = "avx512f")]
47100 unsafe fn test_mm512_mask_cvt_roundps_epi32() {
47101 let a = _mm512_setr_ps(
47102 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47103 );
47104 let src = _mm512_set1_epi32(0);
47105 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47106 src, 0, a,
47107 );
47108 assert_eq_m512i(r, src);
47109 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47110 src,
47111 0b00000000_11111111,
47112 a,
47113 );
47114 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47115 assert_eq_m512i(r, e);
47116 }
47117
47118 #[simd_test(enable = "avx512f")]
47119 unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
47120 let a = _mm512_setr_ps(
47121 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47122 );
47123 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47124 0, a,
47125 );
47126 assert_eq_m512i(r, _mm512_setzero_si512());
47127 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47128 0b00000000_11111111,
47129 a,
47130 );
47131 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47132 assert_eq_m512i(r, e);
47133 }
47134
47135 #[simd_test(enable = "avx512f")]
47136 unsafe fn test_mm512_cvt_roundps_epu32() {
47137 let a = _mm512_setr_ps(
47138 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47139 );
47140 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
47141 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47142 assert_eq_m512i(r, e);
47143 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
47144 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47145 assert_eq_m512i(r, e);
47146 }
47147
47148 #[simd_test(enable = "avx512f")]
47149 unsafe fn test_mm512_mask_cvt_roundps_epu32() {
47150 let a = _mm512_setr_ps(
47151 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47152 );
47153 let src = _mm512_set1_epi32(0);
47154 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47155 src, 0, a,
47156 );
47157 assert_eq_m512i(r, src);
47158 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47159 src,
47160 0b00000000_11111111,
47161 a,
47162 );
47163 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47164 assert_eq_m512i(r, e);
47165 }
47166
47167 #[simd_test(enable = "avx512f")]
47168 unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
47169 let a = _mm512_setr_ps(
47170 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47171 );
47172 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47173 0, a,
47174 );
47175 assert_eq_m512i(r, _mm512_setzero_si512());
47176 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47177 0b00000000_11111111,
47178 a,
47179 );
47180 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47181 assert_eq_m512i(r, e);
47182 }
47183
47184 #[simd_test(enable = "avx512f")]
47185 unsafe fn test_mm512_cvt_roundepi32_ps() {
47186 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47187 let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
47188 let e = _mm512_setr_ps(
47189 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
47190 );
47191 assert_eq_m512(r, e);
47192 }
47193
47194 #[simd_test(enable = "avx512f")]
47195 unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
47196 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47197 let src = _mm512_set1_ps(0.);
47198 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47199 src, 0, a,
47200 );
47201 assert_eq_m512(r, src);
47202 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47203 src,
47204 0b00000000_11111111,
47205 a,
47206 );
47207 let e = _mm512_setr_ps(
47208 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47209 );
47210 assert_eq_m512(r, e);
47211 }
47212
47213 #[simd_test(enable = "avx512f")]
47214 unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
47215 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47216 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47217 0, a,
47218 );
47219 assert_eq_m512(r, _mm512_setzero_ps());
47220 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47221 0b00000000_11111111,
47222 a,
47223 );
47224 let e = _mm512_setr_ps(
47225 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47226 );
47227 assert_eq_m512(r, e);
47228 }
47229
47230 #[simd_test(enable = "avx512f")]
47231 unsafe fn test_mm512_cvt_roundepu32_ps() {
47232 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47233 let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
47234 #[rustfmt::skip]
47235 let e = _mm512_setr_ps(
47236 0., 4294967300., 2., 4294967300.,
47237 4., 4294967300., 6., 4294967300.,
47238 8., 10., 10., 12.,
47239 12., 14., 14., 16.,
47240 );
47241 assert_eq_m512(r, e);
47242 }
47243
47244 #[simd_test(enable = "avx512f")]
47245 unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
47246 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47247 let src = _mm512_set1_ps(0.);
47248 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47249 src, 0, a,
47250 );
47251 assert_eq_m512(r, src);
47252 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47253 src,
47254 0b00000000_11111111,
47255 a,
47256 );
47257 #[rustfmt::skip]
47258 let e = _mm512_setr_ps(
47259 0., 4294967300., 2., 4294967300.,
47260 4., 4294967300., 6., 4294967300.,
47261 0., 0., 0., 0.,
47262 0., 0., 0., 0.,
47263 );
47264 assert_eq_m512(r, e);
47265 }
47266
47267 #[simd_test(enable = "avx512f")]
47268 unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
47269 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47270 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47271 0, a,
47272 );
47273 assert_eq_m512(r, _mm512_setzero_ps());
47274 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47275 0b00000000_11111111,
47276 a,
47277 );
47278 #[rustfmt::skip]
47279 let e = _mm512_setr_ps(
47280 0., 4294967300., 2., 4294967300.,
47281 4., 4294967300., 6., 4294967300.,
47282 0., 0., 0., 0.,
47283 0., 0., 0., 0.,
47284 );
47285 assert_eq_m512(r, e);
47286 }
47287
47288 #[simd_test(enable = "avx512f")]
47289 unsafe fn test_mm512_cvt_roundps_ph() {
47290 let a = _mm512_set1_ps(1.);
47291 let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
47292 let e = _mm256_setr_epi64x(
47293 4323521613979991040,
47294 4323521613979991040,
47295 4323521613979991040,
47296 4323521613979991040,
47297 );
47298 assert_eq_m256i(r, e);
47299 }
47300
47301 #[simd_test(enable = "avx512f")]
47302 unsafe fn test_mm512_mask_cvt_roundps_ph() {
47303 let a = _mm512_set1_ps(1.);
47304 let src = _mm256_set1_epi16(0);
47305 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47306 assert_eq_m256i(r, src);
47307 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47308 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47309 assert_eq_m256i(r, e);
47310 }
47311
47312 #[simd_test(enable = "avx512f")]
47313 unsafe fn test_mm512_maskz_cvt_roundps_ph() {
47314 let a = _mm512_set1_ps(1.);
47315 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
47316 assert_eq_m256i(r, _mm256_setzero_si256());
47317 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47318 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47319 assert_eq_m256i(r, e);
47320 }
47321
47322 #[simd_test(enable = "avx512f,avx512vl")]
47323 unsafe fn test_mm256_mask_cvt_roundps_ph() {
47324 let a = _mm256_set1_ps(1.);
47325 let src = _mm_set1_epi16(0);
47326 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47327 assert_eq_m128i(r, src);
47328 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
47329 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47330 assert_eq_m128i(r, e);
47331 }
47332
47333 #[simd_test(enable = "avx512f,avx512vl")]
47334 unsafe fn test_mm256_maskz_cvt_roundps_ph() {
47335 let a = _mm256_set1_ps(1.);
47336 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
47337 assert_eq_m128i(r, _mm_setzero_si128());
47338 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
47339 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47340 assert_eq_m128i(r, e);
47341 }
47342
47343 #[simd_test(enable = "avx512f,avx512vl")]
47344 unsafe fn test_mm_mask_cvt_roundps_ph() {
47345 let a = _mm_set1_ps(1.);
47346 let src = _mm_set1_epi16(0);
47347 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47348 assert_eq_m128i(r, src);
47349 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
47350 let e = _mm_setr_epi64x(4323521613979991040, 0);
47351 assert_eq_m128i(r, e);
47352 }
47353
47354 #[simd_test(enable = "avx512f,avx512vl")]
47355 unsafe fn test_mm_maskz_cvt_roundps_ph() {
47356 let a = _mm_set1_ps(1.);
47357 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
47358 assert_eq_m128i(r, _mm_setzero_si128());
47359 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
47360 let e = _mm_setr_epi64x(4323521613979991040, 0);
47361 assert_eq_m128i(r, e);
47362 }
47363
47364 #[simd_test(enable = "avx512f")]
47365 unsafe fn test_mm512_cvtps_ph() {
47366 let a = _mm512_set1_ps(1.);
47367 let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
47368 let e = _mm256_setr_epi64x(
47369 4323521613979991040,
47370 4323521613979991040,
47371 4323521613979991040,
47372 4323521613979991040,
47373 );
47374 assert_eq_m256i(r, e);
47375 }
47376
47377 #[simd_test(enable = "avx512f")]
47378 unsafe fn test_mm512_mask_cvtps_ph() {
47379 let a = _mm512_set1_ps(1.);
47380 let src = _mm256_set1_epi16(0);
47381 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47382 assert_eq_m256i(r, src);
47383 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47384 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47385 assert_eq_m256i(r, e);
47386 }
47387
47388 #[simd_test(enable = "avx512f")]
47389 unsafe fn test_mm512_maskz_cvtps_ph() {
47390 let a = _mm512_set1_ps(1.);
47391 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
47392 assert_eq_m256i(r, _mm256_setzero_si256());
47393 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47394 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47395 assert_eq_m256i(r, e);
47396 }
47397
47398 #[simd_test(enable = "avx512f,avx512vl")]
47399 unsafe fn test_mm256_mask_cvtps_ph() {
47400 let a = _mm256_set1_ps(1.);
47401 let src = _mm_set1_epi16(0);
47402 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47403 assert_eq_m128i(r, src);
47404 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
47405 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47406 assert_eq_m128i(r, e);
47407 }
47408
47409 #[simd_test(enable = "avx512f,avx512vl")]
47410 unsafe fn test_mm256_maskz_cvtps_ph() {
47411 let a = _mm256_set1_ps(1.);
47412 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
47413 assert_eq_m128i(r, _mm_setzero_si128());
47414 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
47415 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47416 assert_eq_m128i(r, e);
47417 }
47418
47419 #[simd_test(enable = "avx512f,avx512vl")]
47420 unsafe fn test_mm_mask_cvtps_ph() {
47421 let a = _mm_set1_ps(1.);
47422 let src = _mm_set1_epi16(0);
47423 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47424 assert_eq_m128i(r, src);
47425 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
47426 let e = _mm_setr_epi64x(4323521613979991040, 0);
47427 assert_eq_m128i(r, e);
47428 }
47429
47430 #[simd_test(enable = "avx512f,avx512vl")]
47431 unsafe fn test_mm_maskz_cvtps_ph() {
47432 let a = _mm_set1_ps(1.);
47433 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
47434 assert_eq_m128i(r, _mm_setzero_si128());
47435 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
47436 let e = _mm_setr_epi64x(4323521613979991040, 0);
47437 assert_eq_m128i(r, e);
47438 }
47439
47440 #[simd_test(enable = "avx512f")]
47441 unsafe fn test_mm512_cvt_roundph_ps() {
47442 let a = _mm256_setr_epi64x(
47443 4323521613979991040,
47444 4323521613979991040,
47445 4323521613979991040,
47446 4323521613979991040,
47447 );
47448 let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
47449 let e = _mm512_set1_ps(1.);
47450 assert_eq_m512(r, e);
47451 }
47452
47453 #[simd_test(enable = "avx512f")]
47454 unsafe fn test_mm512_mask_cvt_roundph_ps() {
47455 let a = _mm256_setr_epi64x(
47456 4323521613979991040,
47457 4323521613979991040,
47458 4323521613979991040,
47459 4323521613979991040,
47460 );
47461 let src = _mm512_set1_ps(0.);
47462 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
47463 assert_eq_m512(r, src);
47464 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47465 let e = _mm512_setr_ps(
47466 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47467 );
47468 assert_eq_m512(r, e);
47469 }
47470
47471 #[simd_test(enable = "avx512f")]
47472 unsafe fn test_mm512_maskz_cvt_roundph_ps() {
47473 let a = _mm256_setr_epi64x(
47474 4323521613979991040,
47475 4323521613979991040,
47476 4323521613979991040,
47477 4323521613979991040,
47478 );
47479 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
47480 assert_eq_m512(r, _mm512_setzero_ps());
47481 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47482 let e = _mm512_setr_ps(
47483 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47484 );
47485 assert_eq_m512(r, e);
47486 }
47487
47488 #[simd_test(enable = "avx512f")]
47489 unsafe fn test_mm512_cvtph_ps() {
47490 let a = _mm256_setr_epi64x(
47491 4323521613979991040,
47492 4323521613979991040,
47493 4323521613979991040,
47494 4323521613979991040,
47495 );
47496 let r = _mm512_cvtph_ps(a);
47497 let e = _mm512_set1_ps(1.);
47498 assert_eq_m512(r, e);
47499 }
47500
47501 #[simd_test(enable = "avx512f")]
47502 unsafe fn test_mm512_mask_cvtph_ps() {
47503 let a = _mm256_setr_epi64x(
47504 4323521613979991040,
47505 4323521613979991040,
47506 4323521613979991040,
47507 4323521613979991040,
47508 );
47509 let src = _mm512_set1_ps(0.);
47510 let r = _mm512_mask_cvtph_ps(src, 0, a);
47511 assert_eq_m512(r, src);
47512 let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
47513 let e = _mm512_setr_ps(
47514 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47515 );
47516 assert_eq_m512(r, e);
47517 }
47518
47519 #[simd_test(enable = "avx512f")]
47520 unsafe fn test_mm512_maskz_cvtph_ps() {
47521 let a = _mm256_setr_epi64x(
47522 4323521613979991040,
47523 4323521613979991040,
47524 4323521613979991040,
47525 4323521613979991040,
47526 );
47527 let r = _mm512_maskz_cvtph_ps(0, a);
47528 assert_eq_m512(r, _mm512_setzero_ps());
47529 let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
47530 let e = _mm512_setr_ps(
47531 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47532 );
47533 assert_eq_m512(r, e);
47534 }
47535
47536 #[simd_test(enable = "avx512f,avx512vl")]
47537 unsafe fn test_mm256_mask_cvtph_ps() {
47538 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47539 let src = _mm256_set1_ps(0.);
47540 let r = _mm256_mask_cvtph_ps(src, 0, a);
47541 assert_eq_m256(r, src);
47542 let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
47543 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
47544 assert_eq_m256(r, e);
47545 }
47546
47547 #[simd_test(enable = "avx512f,avx512vl")]
47548 unsafe fn test_mm256_maskz_cvtph_ps() {
47549 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47550 let r = _mm256_maskz_cvtph_ps(0, a);
47551 assert_eq_m256(r, _mm256_setzero_ps());
47552 let r = _mm256_maskz_cvtph_ps(0b11111111, a);
47553 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
47554 assert_eq_m256(r, e);
47555 }
47556
47557 #[simd_test(enable = "avx512f,avx512vl")]
47558 unsafe fn test_mm_mask_cvtph_ps() {
47559 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47560 let src = _mm_set1_ps(0.);
47561 let r = _mm_mask_cvtph_ps(src, 0, a);
47562 assert_eq_m128(r, src);
47563 let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
47564 let e = _mm_setr_ps(1., 1., 1., 1.);
47565 assert_eq_m128(r, e);
47566 }
47567
47568 #[simd_test(enable = "avx512f,avx512vl")]
47569 unsafe fn test_mm_maskz_cvtph_ps() {
47570 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47571 let r = _mm_maskz_cvtph_ps(0, a);
47572 assert_eq_m128(r, _mm_setzero_ps());
47573 let r = _mm_maskz_cvtph_ps(0b00001111, a);
47574 let e = _mm_setr_ps(1., 1., 1., 1.);
47575 assert_eq_m128(r, e);
47576 }
47577
47578 #[simd_test(enable = "avx512f")]
47579 unsafe fn test_mm512_cvtt_roundps_epi32() {
47580 let a = _mm512_setr_ps(
47581 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47582 );
47583 let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
47584 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
47585 assert_eq_m512i(r, e);
47586 }
47587
47588 #[simd_test(enable = "avx512f")]
47589 unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
47590 let a = _mm512_setr_ps(
47591 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47592 );
47593 let src = _mm512_set1_epi32(0);
47594 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
47595 assert_eq_m512i(r, src);
47596 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47597 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47598 assert_eq_m512i(r, e);
47599 }
47600
47601 #[simd_test(enable = "avx512f")]
47602 unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
47603 let a = _mm512_setr_ps(
47604 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47605 );
47606 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
47607 assert_eq_m512i(r, _mm512_setzero_si512());
47608 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47609 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47610 assert_eq_m512i(r, e);
47611 }
47612
47613 #[simd_test(enable = "avx512f")]
47614 unsafe fn test_mm512_cvtt_roundps_epu32() {
47615 let a = _mm512_setr_ps(
47616 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47617 );
47618 let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
47619 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47620 assert_eq_m512i(r, e);
47621 }
47622
47623 #[simd_test(enable = "avx512f")]
47624 unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
47625 let a = _mm512_setr_ps(
47626 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47627 );
47628 let src = _mm512_set1_epi32(0);
47629 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
47630 assert_eq_m512i(r, src);
47631 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47632 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47633 assert_eq_m512i(r, e);
47634 }
47635
47636 #[simd_test(enable = "avx512f")]
47637 unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
47638 let a = _mm512_setr_ps(
47639 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47640 );
47641 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
47642 assert_eq_m512i(r, _mm512_setzero_si512());
47643 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47644 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47645 assert_eq_m512i(r, e);
47646 }
47647
47648 #[simd_test(enable = "avx512f")]
47649 unsafe fn test_mm512_cvttps_epi32() {
47650 let a = _mm512_setr_ps(
47651 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47652 );
47653 let r = _mm512_cvttps_epi32(a);
47654 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
47655 assert_eq_m512i(r, e);
47656 }
47657
47658 #[simd_test(enable = "avx512f")]
47659 unsafe fn test_mm512_mask_cvttps_epi32() {
47660 let a = _mm512_setr_ps(
47661 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47662 );
47663 let src = _mm512_set1_epi32(0);
47664 let r = _mm512_mask_cvttps_epi32(src, 0, a);
47665 assert_eq_m512i(r, src);
47666 let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
47667 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47668 assert_eq_m512i(r, e);
47669 }
47670
47671 #[simd_test(enable = "avx512f")]
47672 unsafe fn test_mm512_maskz_cvttps_epi32() {
47673 let a = _mm512_setr_ps(
47674 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47675 );
47676 let r = _mm512_maskz_cvttps_epi32(0, a);
47677 assert_eq_m512i(r, _mm512_setzero_si512());
47678 let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
47679 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47680 assert_eq_m512i(r, e);
47681 }
47682
47683 #[simd_test(enable = "avx512f,avx512vl")]
47684 unsafe fn test_mm256_mask_cvttps_epi32() {
47685 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47686 let src = _mm256_set1_epi32(0);
47687 let r = _mm256_mask_cvttps_epi32(src, 0, a);
47688 assert_eq_m256i(r, src);
47689 let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
47690 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47691 assert_eq_m256i(r, e);
47692 }
47693
47694 #[simd_test(enable = "avx512f,avx512vl")]
47695 unsafe fn test_mm256_maskz_cvttps_epi32() {
47696 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47697 let r = _mm256_maskz_cvttps_epi32(0, a);
47698 assert_eq_m256i(r, _mm256_setzero_si256());
47699 let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
47700 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47701 assert_eq_m256i(r, e);
47702 }
47703
47704 #[simd_test(enable = "avx512f,avx512vl")]
47705 unsafe fn test_mm_mask_cvttps_epi32() {
47706 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47707 let src = _mm_set1_epi32(0);
47708 let r = _mm_mask_cvttps_epi32(src, 0, a);
47709 assert_eq_m128i(r, src);
47710 let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
47711 let e = _mm_set_epi32(12, 13, 14, 15);
47712 assert_eq_m128i(r, e);
47713 }
47714
47715 #[simd_test(enable = "avx512f,avx512vl")]
47716 unsafe fn test_mm_maskz_cvttps_epi32() {
47717 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47718 let r = _mm_maskz_cvttps_epi32(0, a);
47719 assert_eq_m128i(r, _mm_setzero_si128());
47720 let r = _mm_maskz_cvttps_epi32(0b00001111, a);
47721 let e = _mm_set_epi32(12, 13, 14, 15);
47722 assert_eq_m128i(r, e);
47723 }
47724
47725 #[simd_test(enable = "avx512f")]
47726 unsafe fn test_mm512_cvttps_epu32() {
47727 let a = _mm512_setr_ps(
47728 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47729 );
47730 let r = _mm512_cvttps_epu32(a);
47731 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47732 assert_eq_m512i(r, e);
47733 }
47734
47735 #[simd_test(enable = "avx512f")]
47736 unsafe fn test_mm512_mask_cvttps_epu32() {
47737 let a = _mm512_setr_ps(
47738 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47739 );
47740 let src = _mm512_set1_epi32(0);
47741 let r = _mm512_mask_cvttps_epu32(src, 0, a);
47742 assert_eq_m512i(r, src);
47743 let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
47744 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47745 assert_eq_m512i(r, e);
47746 }
47747
47748 #[simd_test(enable = "avx512f")]
47749 unsafe fn test_mm512_maskz_cvttps_epu32() {
47750 let a = _mm512_setr_ps(
47751 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47752 );
47753 let r = _mm512_maskz_cvttps_epu32(0, a);
47754 assert_eq_m512i(r, _mm512_setzero_si512());
47755 let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
47756 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47757 assert_eq_m512i(r, e);
47758 }
47759
47760 #[simd_test(enable = "avx512f,avx512vl")]
47761 unsafe fn test_mm256_cvttps_epu32() {
47762 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47763 let r = _mm256_cvttps_epu32(a);
47764 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47765 assert_eq_m256i(r, e);
47766 }
47767
47768 #[simd_test(enable = "avx512f,avx512vl")]
47769 unsafe fn test_mm256_mask_cvttps_epu32() {
47770 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47771 let src = _mm256_set1_epi32(0);
47772 let r = _mm256_mask_cvttps_epu32(src, 0, a);
47773 assert_eq_m256i(r, src);
47774 let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
47775 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47776 assert_eq_m256i(r, e);
47777 }
47778
47779 #[simd_test(enable = "avx512f,avx512vl")]
47780 unsafe fn test_mm256_maskz_cvttps_epu32() {
47781 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47782 let r = _mm256_maskz_cvttps_epu32(0, a);
47783 assert_eq_m256i(r, _mm256_setzero_si256());
47784 let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
47785 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47786 assert_eq_m256i(r, e);
47787 }
47788
47789 #[simd_test(enable = "avx512f,avx512vl")]
47790 unsafe fn test_mm_cvttps_epu32() {
47791 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47792 let r = _mm_cvttps_epu32(a);
47793 let e = _mm_set_epi32(12, 13, 14, 15);
47794 assert_eq_m128i(r, e);
47795 }
47796
47797 #[simd_test(enable = "avx512f,avx512vl")]
47798 unsafe fn test_mm_mask_cvttps_epu32() {
47799 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47800 let src = _mm_set1_epi32(0);
47801 let r = _mm_mask_cvttps_epu32(src, 0, a);
47802 assert_eq_m128i(r, src);
47803 let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
47804 let e = _mm_set_epi32(12, 13, 14, 15);
47805 assert_eq_m128i(r, e);
47806 }
47807
47808 #[simd_test(enable = "avx512f,avx512vl")]
47809 unsafe fn test_mm_maskz_cvttps_epu32() {
47810 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47811 let r = _mm_maskz_cvttps_epu32(0, a);
47812 assert_eq_m128i(r, _mm_setzero_si128());
47813 let r = _mm_maskz_cvttps_epu32(0b00001111, a);
47814 let e = _mm_set_epi32(12, 13, 14, 15);
47815 assert_eq_m128i(r, e);
47816 }
47817
47818 #[simd_test(enable = "avx512f")]
47819 unsafe fn test_mm512_i32gather_ps() {
47820 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
47821 // A multiplier of 4 is word-addressing
47822 #[rustfmt::skip]
47823 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47824 120, 128, 136, 144, 152, 160, 168, 176);
47825 let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr() as *const u8);
47826 #[rustfmt::skip]
47827 assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
47828 120., 128., 136., 144., 152., 160., 168., 176.));
47829 }
47830
47831 #[simd_test(enable = "avx512f")]
47832 unsafe fn test_mm512_mask_i32gather_ps() {
47833 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
47834 let src = _mm512_set1_ps(2.);
47835 let mask = 0b10101010_10101010;
47836 #[rustfmt::skip]
47837 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47838 120, 128, 136, 144, 152, 160, 168, 176);
47839 // A multiplier of 4 is word-addressing
47840 let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8);
47841 #[rustfmt::skip]
47842 assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
47843 2., 128., 2., 144., 2., 160., 2., 176.));
47844 }
47845
47846 #[simd_test(enable = "avx512f")]
47847 unsafe fn test_mm512_i32gather_epi32() {
47848 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
47849 // A multiplier of 4 is word-addressing
47850 #[rustfmt::skip]
47851 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47852 120, 128, 136, 144, 152, 160, 168, 176);
47853 let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr() as *const u8);
47854 #[rustfmt::skip]
47855 assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47856 120, 128, 136, 144, 152, 160, 168, 176));
47857 }
47858
47859 #[simd_test(enable = "avx512f")]
47860 unsafe fn test_mm512_mask_i32gather_epi32() {
47861 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
47862 let src = _mm512_set1_epi32(2);
47863 let mask = 0b10101010_10101010;
47864 let index = _mm512_setr_epi32(
47865 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
47866 );
47867 // A multiplier of 4 is word-addressing
47868 let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr() as *const u8);
47869 assert_eq_m512i(
47870 r,
47871 _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
47872 );
47873 }
47874
47875 #[simd_test(enable = "avx512f")]
47876 unsafe fn test_mm512_i32scatter_ps() {
47877 let mut arr = [0f32; 256];
47878 #[rustfmt::skip]
47879 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47880 128, 144, 160, 176, 192, 208, 224, 240);
47881 let src = _mm512_setr_ps(
47882 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
47883 );
47884 // A multiplier of 4 is word-addressing
47885 _mm512_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src);
47886 let mut expected = [0f32; 256];
47887 for i in 0..16 {
47888 expected[i * 16] = (i + 1) as f32;
47889 }
47890 assert_eq!(&arr[..], &expected[..],);
47891 }
47892
47893 #[simd_test(enable = "avx512f")]
47894 unsafe fn test_mm512_mask_i32scatter_ps() {
47895 let mut arr = [0f32; 256];
47896 let mask = 0b10101010_10101010;
47897 #[rustfmt::skip]
47898 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47899 128, 144, 160, 176, 192, 208, 224, 240);
47900 let src = _mm512_setr_ps(
47901 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
47902 );
47903 // A multiplier of 4 is word-addressing
47904 _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
47905 let mut expected = [0f32; 256];
47906 for i in 0..8 {
47907 expected[i * 32 + 16] = 2. * (i + 1) as f32;
47908 }
47909 assert_eq!(&arr[..], &expected[..],);
47910 }
47911
47912 #[simd_test(enable = "avx512f")]
47913 unsafe fn test_mm512_i32scatter_epi32() {
47914 let mut arr = [0i32; 256];
47915 #[rustfmt::skip]
47916
47917 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47918 128, 144, 160, 176, 192, 208, 224, 240);
47919 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
47920 // A multiplier of 4 is word-addressing
47921 _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src);
47922 let mut expected = [0i32; 256];
47923 for i in 0..16 {
47924 expected[i * 16] = (i + 1) as i32;
47925 }
47926 assert_eq!(&arr[..], &expected[..],);
47927 }
47928
47929 #[simd_test(enable = "avx512f")]
47930 unsafe fn test_mm512_mask_i32scatter_epi32() {
47931 let mut arr = [0i32; 256];
47932 let mask = 0b10101010_10101010;
47933 #[rustfmt::skip]
47934 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47935 128, 144, 160, 176, 192, 208, 224, 240);
47936 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
47937 // A multiplier of 4 is word-addressing
47938 _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
47939 let mut expected = [0i32; 256];
47940 for i in 0..8 {
47941 expected[i * 32 + 16] = 2 * (i + 1) as i32;
47942 }
47943 assert_eq!(&arr[..], &expected[..],);
47944 }
47945
47946 #[simd_test(enable = "avx512f")]
47947 unsafe fn test_mm512_cmplt_ps_mask() {
47948 #[rustfmt::skip]
47949 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47950 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47951 let b = _mm512_set1_ps(-1.);
47952 let m = _mm512_cmplt_ps_mask(a, b);
47953 assert_eq!(m, 0b00000101_00000101);
47954 }
47955
47956 #[simd_test(enable = "avx512f")]
47957 unsafe fn test_mm512_mask_cmplt_ps_mask() {
47958 #[rustfmt::skip]
47959 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47960 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47961 let b = _mm512_set1_ps(-1.);
47962 let mask = 0b01100110_01100110;
47963 let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
47964 assert_eq!(r, 0b00000100_00000100);
47965 }
47966
47967 #[simd_test(enable = "avx512f")]
47968 unsafe fn test_mm512_cmpnlt_ps_mask() {
47969 #[rustfmt::skip]
47970 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47971 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47972 let b = _mm512_set1_ps(-1.);
47973 assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
47974 }
47975
47976 #[simd_test(enable = "avx512f")]
47977 unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
47978 #[rustfmt::skip]
47979 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47980 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47981 let b = _mm512_set1_ps(-1.);
47982 let mask = 0b01111010_01111010;
47983 assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
47984 }
47985
47986 #[simd_test(enable = "avx512f")]
47987 unsafe fn test_mm512_cmpnle_ps_mask() {
47988 #[rustfmt::skip]
47989 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47990 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47991 let b = _mm512_set1_ps(-1.);
47992 let m = _mm512_cmpnle_ps_mask(b, a);
47993 assert_eq!(m, 0b00001101_00001101);
47994 }
47995
47996 #[simd_test(enable = "avx512f")]
47997 unsafe fn test_mm512_mask_cmpnle_ps_mask() {
47998 #[rustfmt::skip]
47999 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
48000 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
48001 let b = _mm512_set1_ps(-1.);
48002 let mask = 0b01100110_01100110;
48003 let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
48004 assert_eq!(r, 0b00000100_00000100);
48005 }
48006
48007 #[simd_test(enable = "avx512f")]
48008 unsafe fn test_mm512_cmple_ps_mask() {
48009 #[rustfmt::skip]
48010 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
48011 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
48012 let b = _mm512_set1_ps(-1.);
48013 assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
48014 }
48015
48016 #[simd_test(enable = "avx512f")]
48017 unsafe fn test_mm512_mask_cmple_ps_mask() {
48018 #[rustfmt::skip]
48019 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
48020 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
48021 let b = _mm512_set1_ps(-1.);
48022 let mask = 0b01111010_01111010;
48023 assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
48024 }
48025
48026 #[simd_test(enable = "avx512f")]
48027 unsafe fn test_mm512_cmpeq_ps_mask() {
48028 #[rustfmt::skip]
48029 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
48030 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
48031 #[rustfmt::skip]
48032 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
48033 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
48034 let m = _mm512_cmpeq_ps_mask(b, a);
48035 assert_eq!(m, 0b11001101_11001101);
48036 }
48037
48038 #[simd_test(enable = "avx512f")]
48039 unsafe fn test_mm512_mask_cmpeq_ps_mask() {
48040 #[rustfmt::skip]
48041 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
48042 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
48043 #[rustfmt::skip]
48044 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
48045 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
48046 let mask = 0b01111010_01111010;
48047 let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
48048 assert_eq!(r, 0b01001000_01001000);
48049 }
48050
48051 #[simd_test(enable = "avx512f")]
48052 unsafe fn test_mm512_cmpneq_ps_mask() {
48053 #[rustfmt::skip]
48054 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
48055 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
48056 #[rustfmt::skip]
48057 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
48058 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
48059 let m = _mm512_cmpneq_ps_mask(b, a);
48060 assert_eq!(m, 0b00110010_00110010);
48061 }
48062
48063 #[simd_test(enable = "avx512f")]
48064 unsafe fn test_mm512_mask_cmpneq_ps_mask() {
48065 #[rustfmt::skip]
48066 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
48067 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
48068 #[rustfmt::skip]
48069 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
48070 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
48071 let mask = 0b01111010_01111010;
48072 let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
48073 assert_eq!(r, 0b00110010_00110010)
48074 }
48075
48076 #[simd_test(enable = "avx512f")]
48077 unsafe fn test_mm512_cmp_ps_mask() {
48078 #[rustfmt::skip]
48079 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
48080 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
48081 let b = _mm512_set1_ps(-1.);
48082 let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
48083 assert_eq!(m, 0b00000101_00000101);
48084 }
48085
48086 #[simd_test(enable = "avx512f")]
48087 unsafe fn test_mm512_mask_cmp_ps_mask() {
48088 #[rustfmt::skip]
48089 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
48090 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
48091 let b = _mm512_set1_ps(-1.);
48092 let mask = 0b01100110_01100110;
48093 let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
48094 assert_eq!(r, 0b00000100_00000100);
48095 }
48096
48097 #[simd_test(enable = "avx512f,avx512vl")]
48098 unsafe fn test_mm256_cmp_ps_mask() {
48099 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
48100 let b = _mm256_set1_ps(-1.);
48101 let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
48102 assert_eq!(m, 0b00000101);
48103 }
48104
48105 #[simd_test(enable = "avx512f,avx512vl")]
48106 unsafe fn test_mm256_mask_cmp_ps_mask() {
48107 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
48108 let b = _mm256_set1_ps(-1.);
48109 let mask = 0b01100110;
48110 let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
48111 assert_eq!(r, 0b00000100);
48112 }
48113
48114 #[simd_test(enable = "avx512f,avx512vl")]
48115 unsafe fn test_mm_cmp_ps_mask() {
48116 let a = _mm_set_ps(0., 1., -1., 13.);
48117 let b = _mm_set1_ps(1.);
48118 let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
48119 assert_eq!(m, 0b00001010);
48120 }
48121
48122 #[simd_test(enable = "avx512f,avx512vl")]
48123 unsafe fn test_mm_mask_cmp_ps_mask() {
48124 let a = _mm_set_ps(0., 1., -1., 13.);
48125 let b = _mm_set1_ps(1.);
48126 let mask = 0b11111111;
48127 let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
48128 assert_eq!(r, 0b00001010);
48129 }
48130
48131 #[simd_test(enable = "avx512f")]
48132 unsafe fn test_mm512_cmp_round_ps_mask() {
48133 #[rustfmt::skip]
48134 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
48135 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
48136 let b = _mm512_set1_ps(-1.);
48137 let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
48138 assert_eq!(m, 0b00000101_00000101);
48139 }
48140
48141 #[simd_test(enable = "avx512f")]
48142 unsafe fn test_mm512_mask_cmp_round_ps_mask() {
48143 #[rustfmt::skip]
48144 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
48145 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
48146 let b = _mm512_set1_ps(-1.);
48147 let mask = 0b01100110_01100110;
48148 let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
48149 assert_eq!(r, 0b00000100_00000100);
48150 }
48151
48152 #[simd_test(enable = "avx512f")]
48153 unsafe fn test_mm512_cmpord_ps_mask() {
48154 #[rustfmt::skip]
48155 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
48156 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
48157 #[rustfmt::skip]
48158 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
48159 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
48160 let m = _mm512_cmpord_ps_mask(a, b);
48161 assert_eq!(m, 0b00000101_00000101);
48162 }
48163
48164 #[simd_test(enable = "avx512f")]
48165 unsafe fn test_mm512_mask_cmpord_ps_mask() {
48166 #[rustfmt::skip]
48167 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
48168 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
48169 #[rustfmt::skip]
48170 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
48171 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
48172 let mask = 0b11000011_11000011;
48173 let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
48174 assert_eq!(m, 0b00000001_00000001);
48175 }
48176
48177 #[simd_test(enable = "avx512f")]
48178 unsafe fn test_mm512_cmpunord_ps_mask() {
48179 #[rustfmt::skip]
48180 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
48181 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
48182 #[rustfmt::skip]
48183 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
48184 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
48185 let m = _mm512_cmpunord_ps_mask(a, b);
48186
48187 assert_eq!(m, 0b11111010_11111010);
48188 }
48189
48190 #[simd_test(enable = "avx512f")]
48191 unsafe fn test_mm512_mask_cmpunord_ps_mask() {
48192 #[rustfmt::skip]
48193 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
48194 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
48195 #[rustfmt::skip]
48196 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
48197 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
48198 let mask = 0b00001111_00001111;
48199 let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
48200 assert_eq!(m, 0b000001010_00001010);
48201 }
48202
48203 #[simd_test(enable = "avx512f")]
48204 unsafe fn test_mm_cmp_ss_mask() {
48205 let a = _mm_setr_ps(2., 1., 1., 1.);
48206 let b = _mm_setr_ps(1., 2., 2., 2.);
48207 let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
48208 assert_eq!(m, 1);
48209 }
48210
48211 #[simd_test(enable = "avx512f")]
48212 unsafe fn test_mm_mask_cmp_ss_mask() {
48213 let a = _mm_setr_ps(2., 1., 1., 1.);
48214 let b = _mm_setr_ps(1., 2., 2., 2.);
48215 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
48216 assert_eq!(m, 0);
48217 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
48218 assert_eq!(m, 1);
48219 }
48220
48221 #[simd_test(enable = "avx512f")]
48222 unsafe fn test_mm_cmp_round_ss_mask() {
48223 let a = _mm_setr_ps(2., 1., 1., 1.);
48224 let b = _mm_setr_ps(1., 2., 2., 2.);
48225 let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
48226 assert_eq!(m, 1);
48227 }
48228
48229 #[simd_test(enable = "avx512f")]
48230 unsafe fn test_mm_mask_cmp_round_ss_mask() {
48231 let a = _mm_setr_ps(2., 1., 1., 1.);
48232 let b = _mm_setr_ps(1., 2., 2., 2.);
48233 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
48234 assert_eq!(m, 0);
48235 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
48236 assert_eq!(m, 1);
48237 }
48238
48239 #[simd_test(enable = "avx512f")]
48240 unsafe fn test_mm_cmp_sd_mask() {
48241 let a = _mm_setr_pd(2., 1.);
48242 let b = _mm_setr_pd(1., 2.);
48243 let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
48244 assert_eq!(m, 1);
48245 }
48246
48247 #[simd_test(enable = "avx512f")]
48248 unsafe fn test_mm_mask_cmp_sd_mask() {
48249 let a = _mm_setr_pd(2., 1.);
48250 let b = _mm_setr_pd(1., 2.);
48251 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
48252 assert_eq!(m, 0);
48253 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
48254 assert_eq!(m, 1);
48255 }
48256
48257 #[simd_test(enable = "avx512f")]
48258 unsafe fn test_mm_cmp_round_sd_mask() {
48259 let a = _mm_setr_pd(2., 1.);
48260 let b = _mm_setr_pd(1., 2.);
48261 let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
48262 assert_eq!(m, 1);
48263 }
48264
48265 #[simd_test(enable = "avx512f")]
48266 unsafe fn test_mm_mask_cmp_round_sd_mask() {
48267 let a = _mm_setr_pd(2., 1.);
48268 let b = _mm_setr_pd(1., 2.);
48269 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
48270 assert_eq!(m, 0);
48271 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
48272 assert_eq!(m, 1);
48273 }
48274
48275 #[simd_test(enable = "avx512f")]
48276 unsafe fn test_mm512_cmplt_epu32_mask() {
48277 #[rustfmt::skip]
48278 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48279 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48280 let b = _mm512_set1_epi32(-1);
48281 let m = _mm512_cmplt_epu32_mask(a, b);
48282 assert_eq!(m, 0b11001111_11001111);
48283 }
48284
48285 #[simd_test(enable = "avx512f")]
48286 unsafe fn test_mm512_mask_cmplt_epu32_mask() {
48287 #[rustfmt::skip]
48288 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48289 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48290 let b = _mm512_set1_epi32(-1);
48291 let mask = 0b01111010_01111010;
48292 let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
48293 assert_eq!(r, 0b01001010_01001010);
48294 }
48295
48296 #[simd_test(enable = "avx512f,avx512vl")]
48297 unsafe fn test_mm256_cmplt_epu32_mask() {
48298 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
48299 let b = _mm256_set1_epi32(1);
48300 let r = _mm256_cmplt_epu32_mask(a, b);
48301 assert_eq!(r, 0b10000000);
48302 }
48303
48304 #[simd_test(enable = "avx512f,avx512vl")]
48305 unsafe fn test_mm256_mask_cmplt_epu32_mask() {
48306 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
48307 let b = _mm256_set1_epi32(1);
48308 let mask = 0b11111111;
48309 let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
48310 assert_eq!(r, 0b10000000);
48311 }
48312
48313 #[simd_test(enable = "avx512f,avx512vl")]
48314 unsafe fn test_mm_cmplt_epu32_mask() {
48315 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48316 let b = _mm_set1_epi32(1);
48317 let r = _mm_cmplt_epu32_mask(a, b);
48318 assert_eq!(r, 0b00001000);
48319 }
48320
48321 #[simd_test(enable = "avx512f,avx512vl")]
48322 unsafe fn test_mm_mask_cmplt_epu32_mask() {
48323 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48324 let b = _mm_set1_epi32(1);
48325 let mask = 0b11111111;
48326 let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
48327 assert_eq!(r, 0b00001000);
48328 }
48329
48330 #[simd_test(enable = "avx512f")]
48331 unsafe fn test_mm512_cmpgt_epu32_mask() {
48332 #[rustfmt::skip]
48333 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48334 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48335 let b = _mm512_set1_epi32(-1);
48336 let m = _mm512_cmpgt_epu32_mask(b, a);
48337 assert_eq!(m, 0b11001111_11001111);
48338 }
48339
48340 #[simd_test(enable = "avx512f")]
48341 unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
48342 #[rustfmt::skip]
48343 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48344 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48345 let b = _mm512_set1_epi32(-1);
48346 let mask = 0b01111010_01111010;
48347 let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
48348 assert_eq!(r, 0b01001010_01001010);
48349 }
48350
48351 #[simd_test(enable = "avx512f,avx512vl")]
48352 unsafe fn test_mm256_cmpgt_epu32_mask() {
48353 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
48354 let b = _mm256_set1_epi32(1);
48355 let r = _mm256_cmpgt_epu32_mask(a, b);
48356 assert_eq!(r, 0b00111111);
48357 }
48358
48359 #[simd_test(enable = "avx512f,avx512vl")]
48360 unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
48361 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
48362 let b = _mm256_set1_epi32(1);
48363 let mask = 0b11111111;
48364 let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
48365 assert_eq!(r, 0b00111111);
48366 }
48367
48368 #[simd_test(enable = "avx512f,avx512vl")]
48369 unsafe fn test_mm_cmpgt_epu32_mask() {
48370 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48371 let b = _mm_set1_epi32(1);
48372 let r = _mm_cmpgt_epu32_mask(a, b);
48373 assert_eq!(r, 0b00000011);
48374 }
48375
48376 #[simd_test(enable = "avx512f,avx512vl")]
48377 unsafe fn test_mm_mask_cmpgt_epu32_mask() {
48378 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48379 let b = _mm_set1_epi32(1);
48380 let mask = 0b11111111;
48381 let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
48382 assert_eq!(r, 0b00000011);
48383 }
48384
48385 #[simd_test(enable = "avx512f")]
48386 unsafe fn test_mm512_cmple_epu32_mask() {
48387 #[rustfmt::skip]
48388 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48389 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48390 let b = _mm512_set1_epi32(-1);
48391 assert_eq!(
48392 _mm512_cmple_epu32_mask(a, b),
48393 !_mm512_cmpgt_epu32_mask(a, b)
48394 )
48395 }
48396
48397 #[simd_test(enable = "avx512f")]
48398 unsafe fn test_mm512_mask_cmple_epu32_mask() {
48399 #[rustfmt::skip]
48400 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48401 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48402 let b = _mm512_set1_epi32(-1);
48403 let mask = 0b01111010_01111010;
48404 assert_eq!(
48405 _mm512_mask_cmple_epu32_mask(mask, a, b),
48406 0b01111010_01111010
48407 );
48408 }
48409
48410 #[simd_test(enable = "avx512f,avx512vl")]
48411 unsafe fn test_mm256_cmple_epu32_mask() {
48412 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
48413 let b = _mm256_set1_epi32(1);
48414 let r = _mm256_cmple_epu32_mask(a, b);
48415 assert_eq!(r, 0b11000000)
48416 }
48417
48418 #[simd_test(enable = "avx512f,avx512vl")]
48419 unsafe fn test_mm256_mask_cmple_epu32_mask() {
48420 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
48421 let b = _mm256_set1_epi32(1);
48422 let mask = 0b11111111;
48423 let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
48424 assert_eq!(r, 0b11000000)
48425 }
48426
48427 #[simd_test(enable = "avx512f,avx512vl")]
48428 unsafe fn test_mm_cmple_epu32_mask() {
48429 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48430 let b = _mm_set1_epi32(1);
48431 let r = _mm_cmple_epu32_mask(a, b);
48432 assert_eq!(r, 0b00001100)
48433 }
48434
48435 #[simd_test(enable = "avx512f,avx512vl")]
48436 unsafe fn test_mm_mask_cmple_epu32_mask() {
48437 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48438 let b = _mm_set1_epi32(1);
48439 let mask = 0b11111111;
48440 let r = _mm_mask_cmple_epu32_mask(mask, a, b);
48441 assert_eq!(r, 0b00001100)
48442 }
48443
48444 #[simd_test(enable = "avx512f")]
48445 unsafe fn test_mm512_cmpge_epu32_mask() {
48446 #[rustfmt::skip]
48447 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48448 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48449 let b = _mm512_set1_epi32(-1);
48450 assert_eq!(
48451 _mm512_cmpge_epu32_mask(a, b),
48452 !_mm512_cmplt_epu32_mask(a, b)
48453 )
48454 }
48455
48456 #[simd_test(enable = "avx512f")]
48457 unsafe fn test_mm512_mask_cmpge_epu32_mask() {
48458 #[rustfmt::skip]
48459 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48460 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48461 let b = _mm512_set1_epi32(-1);
48462 let mask = 0b01111010_01111010;
48463 assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
48464 }
48465
48466 #[simd_test(enable = "avx512f,avx512vl")]
48467 unsafe fn test_mm256_cmpge_epu32_mask() {
48468 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
48469 let b = _mm256_set1_epi32(1);
48470 let r = _mm256_cmpge_epu32_mask(a, b);
48471 assert_eq!(r, 0b01111111)
48472 }
48473
48474 #[simd_test(enable = "avx512f,avx512vl")]
48475 unsafe fn test_mm256_mask_cmpge_epu32_mask() {
48476 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
48477 let b = _mm256_set1_epi32(1);
48478 let mask = 0b11111111;
48479 let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
48480 assert_eq!(r, 0b01111111)
48481 }
48482
48483 #[simd_test(enable = "avx512f,avx512vl")]
48484 unsafe fn test_mm_cmpge_epu32_mask() {
48485 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48486 let b = _mm_set1_epi32(1);
48487 let r = _mm_cmpge_epu32_mask(a, b);
48488 assert_eq!(r, 0b00000111)
48489 }
48490
48491 #[simd_test(enable = "avx512f,avx512vl")]
48492 unsafe fn test_mm_mask_cmpge_epu32_mask() {
48493 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48494 let b = _mm_set1_epi32(1);
48495 let mask = 0b11111111;
48496 let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
48497 assert_eq!(r, 0b00000111)
48498 }
48499
48500 #[simd_test(enable = "avx512f")]
48501 unsafe fn test_mm512_cmpeq_epu32_mask() {
48502 #[rustfmt::skip]
48503 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48504 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48505 #[rustfmt::skip]
48506 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48507 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48508 let m = _mm512_cmpeq_epu32_mask(b, a);
48509 assert_eq!(m, 0b11001111_11001111);
48510 }
48511
48512 #[simd_test(enable = "avx512f")]
48513 unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
48514 #[rustfmt::skip]
48515 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48516 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48517 #[rustfmt::skip]
48518 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48519 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48520 let mask = 0b01111010_01111010;
48521 let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
48522 assert_eq!(r, 0b01001010_01001010);
48523 }
48524
48525 #[simd_test(enable = "avx512f,avx512vl")]
48526 unsafe fn test_mm256_cmpeq_epu32_mask() {
48527 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48528 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48529 let m = _mm256_cmpeq_epu32_mask(b, a);
48530 assert_eq!(m, 0b11001111);
48531 }
48532
48533 #[simd_test(enable = "avx512f,avx512vl")]
48534 unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
48535 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48536 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48537 let mask = 0b01111010;
48538 let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
48539 assert_eq!(r, 0b01001010);
48540 }
48541
48542 #[simd_test(enable = "avx512f,avx512vl")]
48543 unsafe fn test_mm_cmpeq_epu32_mask() {
48544 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48545 let b = _mm_set_epi32(0, 1, 13, 42);
48546 let m = _mm_cmpeq_epu32_mask(b, a);
48547 assert_eq!(m, 0b00001100);
48548 }
48549
48550 #[simd_test(enable = "avx512f,avx512vl")]
48551 unsafe fn test_mm_mask_cmpeq_epu32_mask() {
48552 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48553 let b = _mm_set_epi32(0, 1, 13, 42);
48554 let mask = 0b11111111;
48555 let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
48556 assert_eq!(r, 0b00001100);
48557 }
48558
48559 #[simd_test(enable = "avx512f")]
48560 unsafe fn test_mm512_cmpneq_epu32_mask() {
48561 #[rustfmt::skip]
48562 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48563 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48564 #[rustfmt::skip]
48565 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48566 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48567 let m = _mm512_cmpneq_epu32_mask(b, a);
48568 assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
48569 }
48570
48571 #[simd_test(enable = "avx512f")]
48572 unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
48573 #[rustfmt::skip]
48574 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
48575 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
48576 #[rustfmt::skip]
48577 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48578 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48579 let mask = 0b01111010_01111010;
48580 let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
48581 assert_eq!(r, 0b00110010_00110010);
48582 }
48583
48584 #[simd_test(enable = "avx512f,avx512vl")]
48585 unsafe fn test_mm256_cmpneq_epu32_mask() {
48586 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
48587 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
48588 let r = _mm256_cmpneq_epu32_mask(b, a);
48589 assert_eq!(r, 0b00110000);
48590 }
48591
48592 #[simd_test(enable = "avx512f,avx512vl")]
48593 unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
48594 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
48595 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
48596 let mask = 0b11111111;
48597 let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
48598 assert_eq!(r, 0b00110000);
48599 }
48600
48601 #[simd_test(enable = "avx512f,avx512vl")]
48602 unsafe fn test_mm_cmpneq_epu32_mask() {
48603 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48604 let b = _mm_set_epi32(0, 1, 13, 42);
48605 let r = _mm_cmpneq_epu32_mask(b, a);
48606 assert_eq!(r, 0b00000011);
48607 }
48608
48609 #[simd_test(enable = "avx512f,avx512vl")]
48610 unsafe fn test_mm_mask_cmpneq_epu32_mask() {
48611 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48612 let b = _mm_set_epi32(0, 1, 13, 42);
48613 let mask = 0b11111111;
48614 let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
48615 assert_eq!(r, 0b00000011);
48616 }
48617
48618 #[simd_test(enable = "avx512f")]
48619 unsafe fn test_mm512_cmp_epu32_mask() {
48620 #[rustfmt::skip]
48621 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48622 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48623 let b = _mm512_set1_epi32(-1);
48624 let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48625 assert_eq!(m, 0b11001111_11001111);
48626 }
48627
48628 #[simd_test(enable = "avx512f")]
48629 unsafe fn test_mm512_mask_cmp_epu32_mask() {
48630 #[rustfmt::skip]
48631 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48632 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48633 let b = _mm512_set1_epi32(-1);
48634 let mask = 0b01111010_01111010;
48635 let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48636 assert_eq!(r, 0b01001010_01001010);
48637 }
48638
48639 #[simd_test(enable = "avx512f,avx512vl")]
48640 unsafe fn test_mm256_cmp_epu32_mask() {
48641 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48642 let b = _mm256_set1_epi32(-1);
48643 let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48644 assert_eq!(m, 0b11001111);
48645 }
48646
48647 #[simd_test(enable = "avx512f,avx512vl")]
48648 unsafe fn test_mm256_mask_cmp_epu32_mask() {
48649 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48650 let b = _mm256_set1_epi32(-1);
48651 let mask = 0b11111111;
48652 let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48653 assert_eq!(r, 0b11001111);
48654 }
48655
48656 #[simd_test(enable = "avx512f,avx512vl")]
48657 unsafe fn test_mm_cmp_epu32_mask() {
48658 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
48659 let b = _mm_set1_epi32(1);
48660 let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48661 assert_eq!(m, 0b00001000);
48662 }
48663
48664 #[simd_test(enable = "avx512f,avx512vl")]
48665 unsafe fn test_mm_mask_cmp_epu32_mask() {
48666 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
48667 let b = _mm_set1_epi32(1);
48668 let mask = 0b11111111;
48669 let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48670 assert_eq!(r, 0b00001000);
48671 }
48672
48673 #[simd_test(enable = "avx512f")]
48674 unsafe fn test_mm512_cmplt_epi32_mask() {
48675 #[rustfmt::skip]
48676 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48677 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48678 let b = _mm512_set1_epi32(-1);
48679 let m = _mm512_cmplt_epi32_mask(a, b);
48680 assert_eq!(m, 0b00000101_00000101);
48681 }
48682
48683 #[simd_test(enable = "avx512f")]
48684 unsafe fn test_mm512_mask_cmplt_epi32_mask() {
48685 #[rustfmt::skip]
48686 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48687 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48688 let b = _mm512_set1_epi32(-1);
48689 let mask = 0b01100110_01100110;
48690 let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
48691 assert_eq!(r, 0b00000100_00000100);
48692 }
48693
48694 #[simd_test(enable = "avx512f,avx512vl")]
48695 unsafe fn test_mm256_cmplt_epi32_mask() {
48696 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
48697 let b = _mm256_set1_epi32(-1);
48698 let r = _mm256_cmplt_epi32_mask(a, b);
48699 assert_eq!(r, 0b00000101);
48700 }
48701
48702 #[simd_test(enable = "avx512f,avx512vl")]
48703 unsafe fn test_mm256_mask_cmplt_epi32_mask() {
48704 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
48705 let b = _mm256_set1_epi32(-1);
48706 let mask = 0b11111111;
48707 let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
48708 assert_eq!(r, 0b00000101);
48709 }
48710
48711 #[simd_test(enable = "avx512f,avx512vl")]
48712 unsafe fn test_mm_cmplt_epi32_mask() {
48713 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
48714 let b = _mm_set1_epi32(-1);
48715 let r = _mm_cmplt_epi32_mask(a, b);
48716 assert_eq!(r, 0b00000101);
48717 }
48718
48719 #[simd_test(enable = "avx512f,avx512vl")]
48720 unsafe fn test_mm_mask_cmplt_epi32_mask() {
48721 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
48722 let b = _mm_set1_epi32(-1);
48723 let mask = 0b11111111;
48724 let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
48725 assert_eq!(r, 0b00000101);
48726 }
48727
48728 #[simd_test(enable = "avx512f")]
48729 unsafe fn test_mm512_cmpgt_epi32_mask() {
48730 #[rustfmt::skip]
48731 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48732 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48733 let b = _mm512_set1_epi32(-1);
48734 let m = _mm512_cmpgt_epi32_mask(b, a);
48735 assert_eq!(m, 0b00000101_00000101);
48736 }
48737
48738 #[simd_test(enable = "avx512f")]
48739 unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
48740 #[rustfmt::skip]
48741 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48742 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48743 let b = _mm512_set1_epi32(-1);
48744 let mask = 0b01100110_01100110;
48745 let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
48746 assert_eq!(r, 0b00000100_00000100);
48747 }
48748
48749 #[simd_test(enable = "avx512f,avx512vl")]
48750 unsafe fn test_mm256_cmpgt_epi32_mask() {
48751 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48752 let b = _mm256_set1_epi32(-1);
48753 let r = _mm256_cmpgt_epi32_mask(a, b);
48754 assert_eq!(r, 0b11011010);
48755 }
48756
48757 #[simd_test(enable = "avx512f,avx512vl")]
48758 unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
48759 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48760 let b = _mm256_set1_epi32(-1);
48761 let mask = 0b11111111;
48762 let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
48763 assert_eq!(r, 0b11011010);
48764 }
48765
48766 #[simd_test(enable = "avx512f,avx512vl")]
48767 unsafe fn test_mm_cmpgt_epi32_mask() {
48768 let a = _mm_set_epi32(0, 1, -1, 13);
48769 let b = _mm_set1_epi32(-1);
48770 let r = _mm_cmpgt_epi32_mask(a, b);
48771 assert_eq!(r, 0b00001101);
48772 }
48773
48774 #[simd_test(enable = "avx512f,avx512vl")]
48775 unsafe fn test_mm_mask_cmpgt_epi32_mask() {
48776 let a = _mm_set_epi32(0, 1, -1, 13);
48777 let b = _mm_set1_epi32(-1);
48778 let mask = 0b11111111;
48779 let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
48780 assert_eq!(r, 0b00001101);
48781 }
48782
48783 #[simd_test(enable = "avx512f")]
48784 unsafe fn test_mm512_cmple_epi32_mask() {
48785 #[rustfmt::skip]
48786 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48787 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48788 let b = _mm512_set1_epi32(-1);
48789 assert_eq!(
48790 _mm512_cmple_epi32_mask(a, b),
48791 !_mm512_cmpgt_epi32_mask(a, b)
48792 )
48793 }
48794
48795 #[simd_test(enable = "avx512f")]
48796 unsafe fn test_mm512_mask_cmple_epi32_mask() {
48797 #[rustfmt::skip]
48798 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48799 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48800 let b = _mm512_set1_epi32(-1);
48801 let mask = 0b01111010_01111010;
48802 assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
48803 }
48804
48805 #[simd_test(enable = "avx512f,avx512vl")]
48806 unsafe fn test_mm256_cmple_epi32_mask() {
48807 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
48808 let b = _mm256_set1_epi32(-1);
48809 let r = _mm256_cmple_epi32_mask(a, b);
48810 assert_eq!(r, 0b00100101)
48811 }
48812
48813 #[simd_test(enable = "avx512f,avx512vl")]
48814 unsafe fn test_mm256_mask_cmple_epi32_mask() {
48815 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
48816 let b = _mm256_set1_epi32(-1);
48817 let mask = 0b11111111;
48818 let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
48819 assert_eq!(r, 0b00100101)
48820 }
48821
48822 #[simd_test(enable = "avx512f,avx512vl")]
48823 unsafe fn test_mm_cmple_epi32_mask() {
48824 let a = _mm_set_epi32(0, 1, -1, 200);
48825 let b = _mm_set1_epi32(-1);
48826 let r = _mm_cmple_epi32_mask(a, b);
48827 assert_eq!(r, 0b00000010)
48828 }
48829
48830 #[simd_test(enable = "avx512f,avx512vl")]
48831 unsafe fn test_mm_mask_cmple_epi32_mask() {
48832 let a = _mm_set_epi32(0, 1, -1, 200);
48833 let b = _mm_set1_epi32(-1);
48834 let mask = 0b11111111;
48835 let r = _mm_mask_cmple_epi32_mask(mask, a, b);
48836 assert_eq!(r, 0b00000010)
48837 }
48838
48839 #[simd_test(enable = "avx512f")]
48840 unsafe fn test_mm512_cmpge_epi32_mask() {
48841 #[rustfmt::skip]
48842 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48843 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48844 let b = _mm512_set1_epi32(-1);
48845 assert_eq!(
48846 _mm512_cmpge_epi32_mask(a, b),
48847 !_mm512_cmplt_epi32_mask(a, b)
48848 )
48849 }
48850
48851 #[simd_test(enable = "avx512f")]
48852 unsafe fn test_mm512_mask_cmpge_epi32_mask() {
48853 #[rustfmt::skip]
48854 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48855 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48856 let b = _mm512_set1_epi32(-1);
48857 let mask = 0b01111010_01111010;
48858 assert_eq!(
48859 _mm512_mask_cmpge_epi32_mask(mask, a, b),
48860 0b01111010_01111010
48861 );
48862 }
48863
48864 #[simd_test(enable = "avx512f,avx512vl")]
48865 unsafe fn test_mm256_cmpge_epi32_mask() {
48866 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48867 let b = _mm256_set1_epi32(-1);
48868 let r = _mm256_cmpge_epi32_mask(a, b);
48869 assert_eq!(r, 0b11111010)
48870 }
48871
48872 #[simd_test(enable = "avx512f,avx512vl")]
48873 unsafe fn test_mm256_mask_cmpge_epi32_mask() {
48874 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48875 let b = _mm256_set1_epi32(-1);
48876 let mask = 0b11111111;
48877 let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
48878 assert_eq!(r, 0b11111010)
48879 }
48880
48881 #[simd_test(enable = "avx512f,avx512vl")]
48882 unsafe fn test_mm_cmpge_epi32_mask() {
48883 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48884 let b = _mm_set1_epi32(-1);
48885 let r = _mm_cmpge_epi32_mask(a, b);
48886 assert_eq!(r, 0b00001111)
48887 }
48888
48889 #[simd_test(enable = "avx512f,avx512vl")]
48890 unsafe fn test_mm_mask_cmpge_epi32_mask() {
48891 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48892 let b = _mm_set1_epi32(-1);
48893 let mask = 0b11111111;
48894 let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
48895 assert_eq!(r, 0b00001111)
48896 }
48897
48898 #[simd_test(enable = "avx512f")]
48899 unsafe fn test_mm512_cmpeq_epi32_mask() {
48900 #[rustfmt::skip]
48901 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48902 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48903 #[rustfmt::skip]
48904 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48905 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48906 let m = _mm512_cmpeq_epi32_mask(b, a);
48907 assert_eq!(m, 0b11001111_11001111);
48908 }
48909
48910 #[simd_test(enable = "avx512f")]
48911 unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
48912 #[rustfmt::skip]
48913 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48914 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48915 #[rustfmt::skip]
48916 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48917 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48918 let mask = 0b01111010_01111010;
48919 let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
48920 assert_eq!(r, 0b01001010_01001010);
48921 }
48922
48923 #[simd_test(enable = "avx512f,avx512vl")]
48924 unsafe fn test_mm256_cmpeq_epi32_mask() {
48925 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48926 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48927 let m = _mm256_cmpeq_epi32_mask(b, a);
48928 assert_eq!(m, 0b11001111);
48929 }
48930
48931 #[simd_test(enable = "avx512f,avx512vl")]
48932 unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
48933 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48934 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48935 let mask = 0b01111010;
48936 let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
48937 assert_eq!(r, 0b01001010);
48938 }
48939
48940 #[simd_test(enable = "avx512f,avx512vl")]
48941 unsafe fn test_mm_cmpeq_epi32_mask() {
48942 let a = _mm_set_epi32(0, 1, -1, 13);
48943 let b = _mm_set_epi32(0, 1, 13, 42);
48944 let m = _mm_cmpeq_epi32_mask(b, a);
48945 assert_eq!(m, 0b00001100);
48946 }
48947
48948 #[simd_test(enable = "avx512f,avx512vl")]
48949 unsafe fn test_mm_mask_cmpeq_epi32_mask() {
48950 let a = _mm_set_epi32(0, 1, -1, 13);
48951 let b = _mm_set_epi32(0, 1, 13, 42);
48952 let mask = 0b11111111;
48953 let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
48954 assert_eq!(r, 0b00001100);
48955 }
48956
48957 #[simd_test(enable = "avx512f")]
48958 unsafe fn test_mm512_cmpneq_epi32_mask() {
48959 #[rustfmt::skip]
48960 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48961 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48962 #[rustfmt::skip]
48963 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48964 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48965 let m = _mm512_cmpneq_epi32_mask(b, a);
48966 assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
48967 }
48968
48969 #[simd_test(enable = "avx512f")]
48970 unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
48971 #[rustfmt::skip]
48972 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
48973 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
48974 #[rustfmt::skip]
48975 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48976 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48977 let mask = 0b01111010_01111010;
48978 let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
48979 assert_eq!(r, 0b00110010_00110010)
48980 }
48981
48982 #[simd_test(enable = "avx512f,avx512vl")]
48983 unsafe fn test_mm256_cmpneq_epi32_mask() {
48984 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48985 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48986 let m = _mm256_cmpneq_epi32_mask(b, a);
48987 assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
48988 }
48989
48990 #[simd_test(enable = "avx512f,avx512vl")]
48991 unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
48992 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
48993 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48994 let mask = 0b11111111;
48995 let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
48996 assert_eq!(r, 0b00110011)
48997 }
48998
48999 #[simd_test(enable = "avx512f,avx512vl")]
49000 unsafe fn test_mm_cmpneq_epi32_mask() {
49001 let a = _mm_set_epi32(0, 1, -1, 13);
49002 let b = _mm_set_epi32(0, 1, 13, 42);
49003 let r = _mm_cmpneq_epi32_mask(b, a);
49004 assert_eq!(r, 0b00000011)
49005 }
49006
49007 #[simd_test(enable = "avx512f,avx512vl")]
49008 unsafe fn test_mm_mask_cmpneq_epi32_mask() {
49009 let a = _mm_set_epi32(0, 1, -1, 13);
49010 let b = _mm_set_epi32(0, 1, 13, 42);
49011 let mask = 0b11111111;
49012 let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
49013 assert_eq!(r, 0b00000011)
49014 }
49015
49016 #[simd_test(enable = "avx512f")]
49017 unsafe fn test_mm512_cmp_epi32_mask() {
49018 #[rustfmt::skip]
49019 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
49020 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
49021 let b = _mm512_set1_epi32(-1);
49022 let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
49023 assert_eq!(m, 0b00000101_00000101);
49024 }
49025
49026 #[simd_test(enable = "avx512f")]
49027 unsafe fn test_mm512_mask_cmp_epi32_mask() {
49028 #[rustfmt::skip]
49029 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
49030 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
49031 let b = _mm512_set1_epi32(-1);
49032 let mask = 0b01100110_01100110;
49033 let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
49034 assert_eq!(r, 0b00000100_00000100);
49035 }
49036
49037 #[simd_test(enable = "avx512f,avx512vl")]
49038 unsafe fn test_mm256_cmp_epi32_mask() {
49039 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
49040 let b = _mm256_set1_epi32(-1);
49041 let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
49042 assert_eq!(m, 0b00000101);
49043 }
49044
49045 #[simd_test(enable = "avx512f,avx512vl")]
49046 unsafe fn test_mm256_mask_cmp_epi32_mask() {
49047 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
49048 let b = _mm256_set1_epi32(-1);
49049 let mask = 0b01100110;
49050 let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
49051 assert_eq!(r, 0b00000100);
49052 }
49053
49054 #[simd_test(enable = "avx512f,avx512vl")]
49055 unsafe fn test_mm_cmp_epi32_mask() {
49056 let a = _mm_set_epi32(0, 1, -1, 13);
49057 let b = _mm_set1_epi32(1);
49058 let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
49059 assert_eq!(m, 0b00001010);
49060 }
49061
49062 #[simd_test(enable = "avx512f,avx512vl")]
49063 unsafe fn test_mm_mask_cmp_epi32_mask() {
49064 let a = _mm_set_epi32(0, 1, -1, 13);
49065 let b = _mm_set1_epi32(1);
49066 let mask = 0b11111111;
49067 let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
49068 assert_eq!(r, 0b00001010);
49069 }
49070
49071 #[simd_test(enable = "avx512f")]
49072 unsafe fn test_mm512_set_epi8() {
49073 let r = _mm512_set1_epi8(2);
49074 assert_eq_m512i(
49075 r,
49076 _mm512_set_epi8(
49077 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49078 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49079 2, 2, 2, 2, 2, 2, 2, 2,
49080 ),
49081 )
49082 }
49083
49084 #[simd_test(enable = "avx512f")]
49085 unsafe fn test_mm512_set_epi16() {
49086 let r = _mm512_set1_epi16(2);
49087 assert_eq_m512i(
49088 r,
49089 _mm512_set_epi16(
49090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49091 2, 2, 2, 2,
49092 ),
49093 )
49094 }
49095
49096 #[simd_test(enable = "avx512f")]
49097 unsafe fn test_mm512_set_epi32() {
49098 let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49099 assert_eq_m512i(
49100 r,
49101 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
49102 )
49103 }
49104
49105 #[simd_test(enable = "avx512f")]
49106 unsafe fn test_mm512_setr_epi32() {
49107 let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49108 assert_eq_m512i(
49109 r,
49110 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
49111 )
49112 }
49113
49114 #[simd_test(enable = "avx512f")]
49115 unsafe fn test_mm512_set1_epi8() {
49116 let r = _mm512_set_epi8(
49117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49119 2, 2, 2, 2, 2, 2,
49120 );
49121 assert_eq_m512i(r, _mm512_set1_epi8(2));
49122 }
49123
49124 #[simd_test(enable = "avx512f")]
49125 unsafe fn test_mm512_set1_epi16() {
49126 let r = _mm512_set_epi16(
49127 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49128 2, 2, 2,
49129 );
49130 assert_eq_m512i(r, _mm512_set1_epi16(2));
49131 }
49132
49133 #[simd_test(enable = "avx512f")]
49134 unsafe fn test_mm512_set1_epi32() {
49135 let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
49136 assert_eq_m512i(r, _mm512_set1_epi32(2));
49137 }
49138
49139 #[simd_test(enable = "avx512f")]
49140 unsafe fn test_mm512_setzero_si512() {
49141 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
49142 }
49143
49144 #[simd_test(enable = "avx512f")]
49145 unsafe fn test_mm512_setzero_epi32() {
49146 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
49147 }
49148
49149 #[simd_test(enable = "avx512f")]
49150 unsafe fn test_mm512_set_ps() {
49151 let r = _mm512_setr_ps(
49152 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49153 );
49154 assert_eq_m512(
49155 r,
49156 _mm512_set_ps(
49157 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49158 ),
49159 )
49160 }
49161
49162 #[simd_test(enable = "avx512f")]
49163 unsafe fn test_mm512_setr_ps() {
49164 let r = _mm512_set_ps(
49165 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49166 );
49167 assert_eq_m512(
49168 r,
49169 _mm512_setr_ps(
49170 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49171 ),
49172 )
49173 }
49174
49175 #[simd_test(enable = "avx512f")]
49176 unsafe fn test_mm512_set1_ps() {
49177 #[rustfmt::skip]
49178 let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
49179 2., 2., 2., 2., 2., 2., 2., 2.);
49180 assert_eq_m512(expected, _mm512_set1_ps(2.));
49181 }
49182
49183 #[simd_test(enable = "avx512f")]
49184 unsafe fn test_mm512_set4_epi32() {
49185 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
49186 assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
49187 }
49188
49189 #[simd_test(enable = "avx512f")]
49190 unsafe fn test_mm512_set4_ps() {
49191 let r = _mm512_set_ps(
49192 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
49193 );
49194 assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
49195 }
49196
49197 #[simd_test(enable = "avx512f")]
49198 unsafe fn test_mm512_setr4_epi32() {
49199 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
49200 assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
49201 }
49202
49203 #[simd_test(enable = "avx512f")]
49204 unsafe fn test_mm512_setr4_ps() {
49205 let r = _mm512_set_ps(
49206 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
49207 );
49208 assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
49209 }
49210
49211 #[simd_test(enable = "avx512f")]
49212 unsafe fn test_mm512_setzero_ps() {
49213 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
49214 }
49215
49216 #[simd_test(enable = "avx512f")]
49217 unsafe fn test_mm512_setzero() {
49218 assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
49219 }
49220
49221 #[simd_test(enable = "avx512f")]
49222 unsafe fn test_mm512_loadu_pd() {
49223 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
49224 let p = a.as_ptr();
49225 let r = _mm512_loadu_pd(black_box(p));
49226 let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
49227 assert_eq_m512d(r, e);
49228 }
49229
49230 #[simd_test(enable = "avx512f")]
49231 unsafe fn test_mm512_storeu_pd() {
49232 let a = _mm512_set1_pd(9.);
49233 let mut r = _mm512_undefined_pd();
49234 _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
49235 assert_eq_m512d(r, a);
49236 }
49237
49238 #[simd_test(enable = "avx512f")]
49239 unsafe fn test_mm512_loadu_ps() {
49240 let a = &[
49241 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
49242 ];
49243 let p = a.as_ptr();
49244 let r = _mm512_loadu_ps(black_box(p));
49245 let e = _mm512_setr_ps(
49246 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
49247 );
49248 assert_eq_m512(r, e);
49249 }
49250
49251 #[simd_test(enable = "avx512f")]
49252 unsafe fn test_mm512_storeu_ps() {
49253 let a = _mm512_set1_ps(9.);
49254 let mut r = _mm512_undefined_ps();
49255 _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
49256 assert_eq_m512(r, a);
49257 }
49258
49259 #[simd_test(enable = "avx512f")]
49260 unsafe fn test_mm512_mask_loadu_epi32() {
49261 let src = _mm512_set1_epi32(42);
49262 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
49263 let p = a.as_ptr();
49264 let m = 0b11101000_11001010;
49265 let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
49266 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49267 assert_eq_m512i(r, e);
49268 }
49269
49270 #[simd_test(enable = "avx512f")]
49271 unsafe fn test_mm512_maskz_loadu_epi32() {
49272 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
49273 let p = a.as_ptr();
49274 let m = 0b11101000_11001010;
49275 let r = _mm512_maskz_loadu_epi32(m, black_box(p));
49276 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
49277 assert_eq_m512i(r, e);
49278 }
49279
49280 #[simd_test(enable = "avx512f")]
49281 unsafe fn test_mm512_mask_load_epi32() {
49282 #[repr(align(64))]
49283 struct Align {
49284 data: [i32; 16], // 64 bytes
49285 }
49286 let src = _mm512_set1_epi32(42);
49287 let a = Align {
49288 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
49289 };
49290 let p = a.data.as_ptr();
49291 let m = 0b11101000_11001010;
49292 let r = _mm512_mask_load_epi32(src, m, black_box(p));
49293 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49294 assert_eq_m512i(r, e);
49295 }
49296
49297 #[simd_test(enable = "avx512f")]
49298 unsafe fn test_mm512_maskz_load_epi32() {
49299 #[repr(align(64))]
49300 struct Align {
49301 data: [i32; 16], // 64 bytes
49302 }
49303 let a = Align {
49304 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
49305 };
49306 let p = a.data.as_ptr();
49307 let m = 0b11101000_11001010;
49308 let r = _mm512_maskz_load_epi32(m, black_box(p));
49309 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
49310 assert_eq_m512i(r, e);
49311 }
49312
49313 #[simd_test(enable = "avx512f")]
49314 unsafe fn test_mm512_mask_storeu_epi32() {
49315 let mut r = [42_i32; 16];
49316 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49317 let m = 0b11101000_11001010;
49318 _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49319 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49320 assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
49321 }
49322
49323 #[simd_test(enable = "avx512f")]
49324 unsafe fn test_mm512_mask_store_epi32() {
49325 #[repr(align(64))]
49326 struct Align {
49327 data: [i32; 16],
49328 }
49329 let mut r = Align { data: [42; 16] };
49330 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49331 let m = 0b11101000_11001010;
49332 _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49333 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49334 assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
49335 }
49336
49337 #[simd_test(enable = "avx512f")]
49338 unsafe fn test_mm512_mask_loadu_epi64() {
49339 let src = _mm512_set1_epi64(42);
49340 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
49341 let p = a.as_ptr();
49342 let m = 0b11001010;
49343 let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
49344 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49345 assert_eq_m512i(r, e);
49346 }
49347
49348 #[simd_test(enable = "avx512f")]
49349 unsafe fn test_mm512_maskz_loadu_epi64() {
49350 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
49351 let p = a.as_ptr();
49352 let m = 0b11001010;
49353 let r = _mm512_maskz_loadu_epi64(m, black_box(p));
49354 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
49355 assert_eq_m512i(r, e);
49356 }
49357
49358 #[simd_test(enable = "avx512f")]
49359 unsafe fn test_mm512_mask_load_epi64() {
49360 #[repr(align(64))]
49361 struct Align {
49362 data: [i64; 8], // 64 bytes
49363 }
49364 let src = _mm512_set1_epi64(42);
49365 let a = Align {
49366 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
49367 };
49368 let p = a.data.as_ptr();
49369 let m = 0b11001010;
49370 let r = _mm512_mask_load_epi64(src, m, black_box(p));
49371 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49372 assert_eq_m512i(r, e);
49373 }
49374
49375 #[simd_test(enable = "avx512f")]
49376 unsafe fn test_mm512_maskz_load_epi64() {
49377 #[repr(align(64))]
49378 struct Align {
49379 data: [i64; 8], // 64 bytes
49380 }
49381 let a = Align {
49382 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
49383 };
49384 let p = a.data.as_ptr();
49385 let m = 0b11001010;
49386 let r = _mm512_maskz_load_epi64(m, black_box(p));
49387 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
49388 assert_eq_m512i(r, e);
49389 }
49390
49391 #[simd_test(enable = "avx512f")]
49392 unsafe fn test_mm512_mask_storeu_epi64() {
49393 let mut r = [42_i64; 8];
49394 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
49395 let m = 0b11001010;
49396 _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49397 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49398 assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
49399 }
49400
49401 #[simd_test(enable = "avx512f")]
49402 unsafe fn test_mm512_mask_store_epi64() {
49403 #[repr(align(64))]
49404 struct Align {
49405 data: [i64; 8],
49406 }
49407 let mut r = Align { data: [42; 8] };
49408 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
49409 let m = 0b11001010;
49410 let p = r.data.as_mut_ptr();
49411 _mm512_mask_store_epi64(p, m, a);
49412 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49413 assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
49414 }
49415
49416 #[simd_test(enable = "avx512f")]
49417 unsafe fn test_mm512_mask_loadu_ps() {
49418 let src = _mm512_set1_ps(42.0);
49419 let a = &[
49420 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
49421 16.0,
49422 ];
49423 let p = a.as_ptr();
49424 let m = 0b11101000_11001010;
49425 let r = _mm512_mask_loadu_ps(src, m, black_box(p));
49426 let e = _mm512_setr_ps(
49427 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49428 16.0,
49429 );
49430 assert_eq_m512(r, e);
49431 }
49432
49433 #[simd_test(enable = "avx512f")]
49434 unsafe fn test_mm512_maskz_loadu_ps() {
49435 let a = &[
49436 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
49437 16.0,
49438 ];
49439 let p = a.as_ptr();
49440 let m = 0b11101000_11001010;
49441 let r = _mm512_maskz_loadu_ps(m, black_box(p));
49442 let e = _mm512_setr_ps(
49443 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
49444 );
49445 assert_eq_m512(r, e);
49446 }
49447
49448 #[simd_test(enable = "avx512f")]
49449 unsafe fn test_mm512_mask_load_ps() {
49450 #[repr(align(64))]
49451 struct Align {
49452 data: [f32; 16], // 64 bytes
49453 }
49454 let src = _mm512_set1_ps(42.0);
49455 let a = Align {
49456 data: [
49457 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
49458 15.0, 16.0,
49459 ],
49460 };
49461 let p = a.data.as_ptr();
49462 let m = 0b11101000_11001010;
49463 let r = _mm512_mask_load_ps(src, m, black_box(p));
49464 let e = _mm512_setr_ps(
49465 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49466 16.0,
49467 );
49468 assert_eq_m512(r, e);
49469 }
49470
49471 #[simd_test(enable = "avx512f")]
49472 unsafe fn test_mm512_maskz_load_ps() {
49473 #[repr(align(64))]
49474 struct Align {
49475 data: [f32; 16], // 64 bytes
49476 }
49477 let a = Align {
49478 data: [
49479 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
49480 15.0, 16.0,
49481 ],
49482 };
49483 let p = a.data.as_ptr();
49484 let m = 0b11101000_11001010;
49485 let r = _mm512_maskz_load_ps(m, black_box(p));
49486 let e = _mm512_setr_ps(
49487 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
49488 );
49489 assert_eq_m512(r, e);
49490 }
49491
49492 #[simd_test(enable = "avx512f")]
49493 unsafe fn test_mm512_mask_storeu_ps() {
49494 let mut r = [42_f32; 16];
49495 let a = _mm512_setr_ps(
49496 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
49497 );
49498 let m = 0b11101000_11001010;
49499 _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
49500 let e = _mm512_setr_ps(
49501 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49502 16.0,
49503 );
49504 assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
49505 }
49506
49507 #[simd_test(enable = "avx512f")]
49508 unsafe fn test_mm512_mask_store_ps() {
49509 #[repr(align(64))]
49510 struct Align {
49511 data: [f32; 16],
49512 }
49513 let mut r = Align { data: [42.0; 16] };
49514 let a = _mm512_setr_ps(
49515 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
49516 );
49517 let m = 0b11101000_11001010;
49518 _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
49519 let e = _mm512_setr_ps(
49520 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49521 16.0,
49522 );
49523 assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
49524 }
49525
49526 #[simd_test(enable = "avx512f")]
49527 unsafe fn test_mm512_mask_loadu_pd() {
49528 let src = _mm512_set1_pd(42.0);
49529 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49530 let p = a.as_ptr();
49531 let m = 0b11001010;
49532 let r = _mm512_mask_loadu_pd(src, m, black_box(p));
49533 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49534 assert_eq_m512d(r, e);
49535 }
49536
49537 #[simd_test(enable = "avx512f")]
49538 unsafe fn test_mm512_maskz_loadu_pd() {
49539 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49540 let p = a.as_ptr();
49541 let m = 0b11001010;
49542 let r = _mm512_maskz_loadu_pd(m, black_box(p));
49543 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49544 assert_eq_m512d(r, e);
49545 }
49546
49547 #[simd_test(enable = "avx512f")]
49548 unsafe fn test_mm512_mask_load_pd() {
49549 #[repr(align(64))]
49550 struct Align {
49551 data: [f64; 8], // 64 bytes
49552 }
49553 let src = _mm512_set1_pd(42.0);
49554 let a = Align {
49555 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49556 };
49557 let p = a.data.as_ptr();
49558 let m = 0b11001010;
49559 let r = _mm512_mask_load_pd(src, m, black_box(p));
49560 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49561 assert_eq_m512d(r, e);
49562 }
49563
49564 #[simd_test(enable = "avx512f")]
49565 unsafe fn test_mm512_maskz_load_pd() {
49566 #[repr(align(64))]
49567 struct Align {
49568 data: [f64; 8], // 64 bytes
49569 }
49570 let a = Align {
49571 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49572 };
49573 let p = a.data.as_ptr();
49574 let m = 0b11001010;
49575 let r = _mm512_maskz_load_pd(m, black_box(p));
49576 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49577 assert_eq_m512d(r, e);
49578 }
49579
49580 #[simd_test(enable = "avx512f")]
49581 unsafe fn test_mm512_mask_storeu_pd() {
49582 let mut r = [42_f64; 8];
49583 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49584 let m = 0b11001010;
49585 _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
49586 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49587 assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
49588 }
49589
49590 #[simd_test(enable = "avx512f")]
49591 unsafe fn test_mm512_mask_store_pd() {
49592 #[repr(align(64))]
49593 struct Align {
49594 data: [f64; 8],
49595 }
49596 let mut r = Align { data: [42.0; 8] };
49597 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49598 let m = 0b11001010;
49599 _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
49600 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49601 assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
49602 }
49603
49604 #[simd_test(enable = "avx512f,avx512vl")]
49605 unsafe fn test_mm256_mask_loadu_epi32() {
49606 let src = _mm256_set1_epi32(42);
49607 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
49608 let p = a.as_ptr();
49609 let m = 0b11001010;
49610 let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
49611 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49612 assert_eq_m256i(r, e);
49613 }
49614
49615 #[simd_test(enable = "avx512f,avx512vl")]
49616 unsafe fn test_mm256_maskz_loadu_epi32() {
49617 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
49618 let p = a.as_ptr();
49619 let m = 0b11001010;
49620 let r = _mm256_maskz_loadu_epi32(m, black_box(p));
49621 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
49622 assert_eq_m256i(r, e);
49623 }
49624
49625 #[simd_test(enable = "avx512f,avx512vl")]
49626 unsafe fn test_mm256_mask_load_epi32() {
49627 #[repr(align(32))]
49628 struct Align {
49629 data: [i32; 8], // 32 bytes
49630 }
49631 let src = _mm256_set1_epi32(42);
49632 let a = Align {
49633 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
49634 };
49635 let p = a.data.as_ptr();
49636 let m = 0b11001010;
49637 let r = _mm256_mask_load_epi32(src, m, black_box(p));
49638 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49639 assert_eq_m256i(r, e);
49640 }
49641
49642 #[simd_test(enable = "avx512f,avx512vl")]
49643 unsafe fn test_mm256_maskz_load_epi32() {
49644 #[repr(align(32))]
49645 struct Align {
49646 data: [i32; 8], // 32 bytes
49647 }
49648 let a = Align {
49649 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
49650 };
49651 let p = a.data.as_ptr();
49652 let m = 0b11001010;
49653 let r = _mm256_maskz_load_epi32(m, black_box(p));
49654 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
49655 assert_eq_m256i(r, e);
49656 }
49657
49658 #[simd_test(enable = "avx512f,avx512vl")]
49659 unsafe fn test_mm256_mask_storeu_epi32() {
49660 let mut r = [42_i32; 8];
49661 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49662 let m = 0b11001010;
49663 _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49664 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49665 assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
49666 }
49667
49668 #[simd_test(enable = "avx512f,avx512vl")]
49669 unsafe fn test_mm256_mask_store_epi32() {
49670 #[repr(align(64))]
49671 struct Align {
49672 data: [i32; 8],
49673 }
49674 let mut r = Align { data: [42; 8] };
49675 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49676 let m = 0b11001010;
49677 _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49678 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49679 assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
49680 }
49681
49682 #[simd_test(enable = "avx512f,avx512vl")]
49683 unsafe fn test_mm256_mask_loadu_epi64() {
49684 let src = _mm256_set1_epi64x(42);
49685 let a = &[1_i64, 2, 3, 4];
49686 let p = a.as_ptr();
49687 let m = 0b1010;
49688 let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
49689 let e = _mm256_setr_epi64x(42, 2, 42, 4);
49690 assert_eq_m256i(r, e);
49691 }
49692
49693 #[simd_test(enable = "avx512f,avx512vl")]
49694 unsafe fn test_mm256_maskz_loadu_epi64() {
49695 let a = &[1_i64, 2, 3, 4];
49696 let p = a.as_ptr();
49697 let m = 0b1010;
49698 let r = _mm256_maskz_loadu_epi64(m, black_box(p));
49699 let e = _mm256_setr_epi64x(0, 2, 0, 4);
49700 assert_eq_m256i(r, e);
49701 }
49702
49703 #[simd_test(enable = "avx512f,avx512vl")]
49704 unsafe fn test_mm256_mask_load_epi64() {
49705 #[repr(align(32))]
49706 struct Align {
49707 data: [i64; 4], // 32 bytes
49708 }
49709 let src = _mm256_set1_epi64x(42);
49710 let a = Align {
49711 data: [1_i64, 2, 3, 4],
49712 };
49713 let p = a.data.as_ptr();
49714 let m = 0b1010;
49715 let r = _mm256_mask_load_epi64(src, m, black_box(p));
49716 let e = _mm256_setr_epi64x(42, 2, 42, 4);
49717 assert_eq_m256i(r, e);
49718 }
49719
49720 #[simd_test(enable = "avx512f,avx512vl")]
49721 unsafe fn test_mm256_maskz_load_epi64() {
49722 #[repr(align(32))]
49723 struct Align {
49724 data: [i64; 4], // 32 bytes
49725 }
49726 let a = Align {
49727 data: [1_i64, 2, 3, 4],
49728 };
49729 let p = a.data.as_ptr();
49730 let m = 0b1010;
49731 let r = _mm256_maskz_load_epi64(m, black_box(p));
49732 let e = _mm256_setr_epi64x(0, 2, 0, 4);
49733 assert_eq_m256i(r, e);
49734 }
49735
49736 #[simd_test(enable = "avx512f,avx512vl")]
49737 unsafe fn test_mm256_mask_storeu_epi64() {
49738 let mut r = [42_i64; 4];
49739 let a = _mm256_setr_epi64x(1, 2, 3, 4);
49740 let m = 0b1010;
49741 _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49742 let e = _mm256_setr_epi64x(42, 2, 42, 4);
49743 assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
49744 }
49745
49746 #[simd_test(enable = "avx512f,avx512vl")]
49747 unsafe fn test_mm256_mask_store_epi64() {
49748 #[repr(align(32))]
49749 struct Align {
49750 data: [i64; 4],
49751 }
49752 let mut r = Align { data: [42; 4] };
49753 let a = _mm256_setr_epi64x(1, 2, 3, 4);
49754 let m = 0b1010;
49755 _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
49756 let e = _mm256_setr_epi64x(42, 2, 42, 4);
49757 assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
49758 }
49759
49760 #[simd_test(enable = "avx512f,avx512vl")]
49761 unsafe fn test_mm256_mask_loadu_ps() {
49762 let src = _mm256_set1_ps(42.0);
49763 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49764 let p = a.as_ptr();
49765 let m = 0b11001010;
49766 let r = _mm256_mask_loadu_ps(src, m, black_box(p));
49767 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49768 assert_eq_m256(r, e);
49769 }
49770
49771 #[simd_test(enable = "avx512f,avx512vl")]
49772 unsafe fn test_mm256_maskz_loadu_ps() {
49773 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49774 let p = a.as_ptr();
49775 let m = 0b11001010;
49776 let r = _mm256_maskz_loadu_ps(m, black_box(p));
49777 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49778 assert_eq_m256(r, e);
49779 }
49780
49781 #[simd_test(enable = "avx512f,avx512vl")]
49782 unsafe fn test_mm256_mask_load_ps() {
49783 #[repr(align(32))]
49784 struct Align {
49785 data: [f32; 8], // 32 bytes
49786 }
49787 let src = _mm256_set1_ps(42.0);
49788 let a = Align {
49789 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49790 };
49791 let p = a.data.as_ptr();
49792 let m = 0b11001010;
49793 let r = _mm256_mask_load_ps(src, m, black_box(p));
49794 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49795 assert_eq_m256(r, e);
49796 }
49797
49798 #[simd_test(enable = "avx512f,avx512vl")]
49799 unsafe fn test_mm256_maskz_load_ps() {
49800 #[repr(align(32))]
49801 struct Align {
49802 data: [f32; 8], // 32 bytes
49803 }
49804 let a = Align {
49805 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49806 };
49807 let p = a.data.as_ptr();
49808 let m = 0b11001010;
49809 let r = _mm256_maskz_load_ps(m, black_box(p));
49810 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49811 assert_eq_m256(r, e);
49812 }
49813
49814 #[simd_test(enable = "avx512f,avx512vl")]
49815 unsafe fn test_mm256_mask_storeu_ps() {
49816 let mut r = [42_f32; 8];
49817 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49818 let m = 0b11001010;
49819 _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
49820 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49821 assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
49822 }
49823
49824 #[simd_test(enable = "avx512f,avx512vl")]
49825 unsafe fn test_mm256_mask_store_ps() {
49826 #[repr(align(32))]
49827 struct Align {
49828 data: [f32; 8],
49829 }
49830 let mut r = Align { data: [42.0; 8] };
49831 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49832 let m = 0b11001010;
49833 _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
49834 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49835 assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
49836 }
49837
49838 #[simd_test(enable = "avx512f,avx512vl")]
49839 unsafe fn test_mm256_mask_loadu_pd() {
49840 let src = _mm256_set1_pd(42.0);
49841 let a = &[1.0_f64, 2.0, 3.0, 4.0];
49842 let p = a.as_ptr();
49843 let m = 0b1010;
49844 let r = _mm256_mask_loadu_pd(src, m, black_box(p));
49845 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49846 assert_eq_m256d(r, e);
49847 }
49848
49849 #[simd_test(enable = "avx512f,avx512vl")]
49850 unsafe fn test_mm256_maskz_loadu_pd() {
49851 let a = &[1.0_f64, 2.0, 3.0, 4.0];
49852 let p = a.as_ptr();
49853 let m = 0b1010;
49854 let r = _mm256_maskz_loadu_pd(m, black_box(p));
49855 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
49856 assert_eq_m256d(r, e);
49857 }
49858
49859 #[simd_test(enable = "avx512f,avx512vl")]
49860 unsafe fn test_mm256_mask_load_pd() {
49861 #[repr(align(32))]
49862 struct Align {
49863 data: [f64; 4], // 32 bytes
49864 }
49865 let src = _mm256_set1_pd(42.0);
49866 let a = Align {
49867 data: [1.0_f64, 2.0, 3.0, 4.0],
49868 };
49869 let p = a.data.as_ptr();
49870 let m = 0b1010;
49871 let r = _mm256_mask_load_pd(src, m, black_box(p));
49872 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49873 assert_eq_m256d(r, e);
49874 }
49875
49876 #[simd_test(enable = "avx512f,avx512vl")]
49877 unsafe fn test_mm256_maskz_load_pd() {
49878 #[repr(align(32))]
49879 struct Align {
49880 data: [f64; 4], // 32 bytes
49881 }
49882 let a = Align {
49883 data: [1.0_f64, 2.0, 3.0, 4.0],
49884 };
49885 let p = a.data.as_ptr();
49886 let m = 0b1010;
49887 let r = _mm256_maskz_load_pd(m, black_box(p));
49888 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
49889 assert_eq_m256d(r, e);
49890 }
49891
49892 #[simd_test(enable = "avx512f,avx512vl")]
49893 unsafe fn test_mm256_mask_storeu_pd() {
49894 let mut r = [42_f64; 4];
49895 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
49896 let m = 0b1010;
49897 _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
49898 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49899 assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
49900 }
49901
49902 #[simd_test(enable = "avx512f,avx512vl")]
49903 unsafe fn test_mm256_mask_store_pd() {
49904 #[repr(align(32))]
49905 struct Align {
49906 data: [f64; 4],
49907 }
49908 let mut r = Align { data: [42.0; 4] };
49909 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
49910 let m = 0b1010;
49911 _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
49912 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49913 assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
49914 }
49915
49916 #[simd_test(enable = "avx512f,avx512vl")]
49917 unsafe fn test_mm_mask_loadu_epi32() {
49918 let src = _mm_set1_epi32(42);
49919 let a = &[1_i32, 2, 3, 4];
49920 let p = a.as_ptr();
49921 let m = 0b1010;
49922 let r = _mm_mask_loadu_epi32(src, m, black_box(p));
49923 let e = _mm_setr_epi32(42, 2, 42, 4);
49924 assert_eq_m128i(r, e);
49925 }
49926
49927 #[simd_test(enable = "avx512f,avx512vl")]
49928 unsafe fn test_mm_maskz_loadu_epi32() {
49929 let a = &[1_i32, 2, 3, 4];
49930 let p = a.as_ptr();
49931 let m = 0b1010;
49932 let r = _mm_maskz_loadu_epi32(m, black_box(p));
49933 let e = _mm_setr_epi32(0, 2, 0, 4);
49934 assert_eq_m128i(r, e);
49935 }
49936
49937 #[simd_test(enable = "avx512f,avx512vl")]
49938 unsafe fn test_mm_mask_load_epi32() {
49939 #[repr(align(16))]
49940 struct Align {
49941 data: [i32; 4], // 32 bytes
49942 }
49943 let src = _mm_set1_epi32(42);
49944 let a = Align {
49945 data: [1_i32, 2, 3, 4],
49946 };
49947 let p = a.data.as_ptr();
49948 let m = 0b1010;
49949 let r = _mm_mask_load_epi32(src, m, black_box(p));
49950 let e = _mm_setr_epi32(42, 2, 42, 4);
49951 assert_eq_m128i(r, e);
49952 }
49953
49954 #[simd_test(enable = "avx512f,avx512vl")]
49955 unsafe fn test_mm_maskz_load_epi32() {
49956 #[repr(align(16))]
49957 struct Align {
49958 data: [i32; 4], // 16 bytes
49959 }
49960 let a = Align {
49961 data: [1_i32, 2, 3, 4],
49962 };
49963 let p = a.data.as_ptr();
49964 let m = 0b1010;
49965 let r = _mm_maskz_load_epi32(m, black_box(p));
49966 let e = _mm_setr_epi32(0, 2, 0, 4);
49967 assert_eq_m128i(r, e);
49968 }
49969
49970 #[simd_test(enable = "avx512f,avx512vl")]
49971 unsafe fn test_mm_mask_storeu_epi32() {
49972 let mut r = [42_i32; 4];
49973 let a = _mm_setr_epi32(1, 2, 3, 4);
49974 let m = 0b1010;
49975 _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49976 let e = _mm_setr_epi32(42, 2, 42, 4);
49977 assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
49978 }
49979
49980 #[simd_test(enable = "avx512f,avx512vl")]
49981 unsafe fn test_mm_mask_store_epi32() {
49982 #[repr(align(16))]
49983 struct Align {
49984 data: [i32; 4], // 16 bytes
49985 }
49986 let mut r = Align { data: [42; 4] };
49987 let a = _mm_setr_epi32(1, 2, 3, 4);
49988 let m = 0b1010;
49989 _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49990 let e = _mm_setr_epi32(42, 2, 42, 4);
49991 assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
49992 }
49993
49994 #[simd_test(enable = "avx512f,avx512vl")]
49995 unsafe fn test_mm_mask_loadu_epi64() {
49996 let src = _mm_set1_epi64x(42);
49997 let a = &[1_i64, 2];
49998 let p = a.as_ptr();
49999 let m = 0b10;
50000 let r = _mm_mask_loadu_epi64(src, m, black_box(p));
50001 let e = _mm_setr_epi64x(42, 2);
50002 assert_eq_m128i(r, e);
50003 }
50004
50005 #[simd_test(enable = "avx512f,avx512vl")]
50006 unsafe fn test_mm_maskz_loadu_epi64() {
50007 let a = &[1_i64, 2];
50008 let p = a.as_ptr();
50009 let m = 0b10;
50010 let r = _mm_maskz_loadu_epi64(m, black_box(p));
50011 let e = _mm_setr_epi64x(0, 2);
50012 assert_eq_m128i(r, e);
50013 }
50014
50015 #[simd_test(enable = "avx512f,avx512vl")]
50016 unsafe fn test_mm_mask_load_epi64() {
50017 #[repr(align(16))]
50018 struct Align {
50019 data: [i64; 2], // 16 bytes
50020 }
50021 let src = _mm_set1_epi64x(42);
50022 let a = Align { data: [1_i64, 2] };
50023 let p = a.data.as_ptr();
50024 let m = 0b10;
50025 let r = _mm_mask_load_epi64(src, m, black_box(p));
50026 let e = _mm_setr_epi64x(42, 2);
50027 assert_eq_m128i(r, e);
50028 }
50029
50030 #[simd_test(enable = "avx512f,avx512vl")]
50031 unsafe fn test_mm_maskz_load_epi64() {
50032 #[repr(align(16))]
50033 struct Align {
50034 data: [i64; 2], // 16 bytes
50035 }
50036 let a = Align { data: [1_i64, 2] };
50037 let p = a.data.as_ptr();
50038 let m = 0b10;
50039 let r = _mm_maskz_load_epi64(m, black_box(p));
50040 let e = _mm_setr_epi64x(0, 2);
50041 assert_eq_m128i(r, e);
50042 }
50043
50044 #[simd_test(enable = "avx512f,avx512vl")]
50045 unsafe fn test_mm_mask_storeu_epi64() {
50046 let mut r = [42_i64; 2];
50047 let a = _mm_setr_epi64x(1, 2);
50048 let m = 0b10;
50049 _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
50050 let e = _mm_setr_epi64x(42, 2);
50051 assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
50052 }
50053
50054 #[simd_test(enable = "avx512f,avx512vl")]
50055 unsafe fn test_mm_mask_store_epi64() {
50056 #[repr(align(16))]
50057 struct Align {
50058 data: [i64; 2], // 16 bytes
50059 }
50060 let mut r = Align { data: [42; 2] };
50061 let a = _mm_setr_epi64x(1, 2);
50062 let m = 0b10;
50063 _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
50064 let e = _mm_setr_epi64x(42, 2);
50065 assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
50066 }
50067
50068 #[simd_test(enable = "avx512f,avx512vl")]
50069 unsafe fn test_mm_mask_loadu_ps() {
50070 let src = _mm_set1_ps(42.0);
50071 let a = &[1.0_f32, 2.0, 3.0, 4.0];
50072 let p = a.as_ptr();
50073 let m = 0b1010;
50074 let r = _mm_mask_loadu_ps(src, m, black_box(p));
50075 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
50076 assert_eq_m128(r, e);
50077 }
50078
50079 #[simd_test(enable = "avx512f,avx512vl")]
50080 unsafe fn test_mm_maskz_loadu_ps() {
50081 let a = &[1.0_f32, 2.0, 3.0, 4.0];
50082 let p = a.as_ptr();
50083 let m = 0b1010;
50084 let r = _mm_maskz_loadu_ps(m, black_box(p));
50085 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
50086 assert_eq_m128(r, e);
50087 }
50088
50089 #[simd_test(enable = "avx512f,avx512vl")]
50090 unsafe fn test_mm_mask_load_ps() {
50091 #[repr(align(16))]
50092 struct Align {
50093 data: [f32; 4], // 16 bytes
50094 }
50095 let src = _mm_set1_ps(42.0);
50096 let a = Align {
50097 data: [1.0_f32, 2.0, 3.0, 4.0],
50098 };
50099 let p = a.data.as_ptr();
50100 let m = 0b1010;
50101 let r = _mm_mask_load_ps(src, m, black_box(p));
50102 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
50103 assert_eq_m128(r, e);
50104 }
50105
50106 #[simd_test(enable = "avx512f,avx512vl")]
50107 unsafe fn test_mm_maskz_load_ps() {
50108 #[repr(align(16))]
50109 struct Align {
50110 data: [f32; 4], // 16 bytes
50111 }
50112 let a = Align {
50113 data: [1.0_f32, 2.0, 3.0, 4.0],
50114 };
50115 let p = a.data.as_ptr();
50116 let m = 0b1010;
50117 let r = _mm_maskz_load_ps(m, black_box(p));
50118 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
50119 assert_eq_m128(r, e);
50120 }
50121
50122 #[simd_test(enable = "avx512f,avx512vl")]
50123 unsafe fn test_mm_mask_storeu_ps() {
50124 let mut r = [42_f32; 4];
50125 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
50126 let m = 0b1010;
50127 _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
50128 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
50129 assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
50130 }
50131
50132 #[simd_test(enable = "avx512f,avx512vl")]
50133 unsafe fn test_mm_mask_store_ps() {
50134 #[repr(align(16))]
50135 struct Align {
50136 data: [f32; 4], // 16 bytes
50137 }
50138 let mut r = Align { data: [42.0; 4] };
50139 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
50140 let m = 0b1010;
50141 _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
50142 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
50143 assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
50144 }
50145
50146 #[simd_test(enable = "avx512f,avx512vl")]
50147 unsafe fn test_mm_mask_loadu_pd() {
50148 let src = _mm_set1_pd(42.0);
50149 let a = &[1.0_f64, 2.0];
50150 let p = a.as_ptr();
50151 let m = 0b10;
50152 let r = _mm_mask_loadu_pd(src, m, black_box(p));
50153 let e = _mm_setr_pd(42.0, 2.0);
50154 assert_eq_m128d(r, e);
50155 }
50156
50157 #[simd_test(enable = "avx512f,avx512vl")]
50158 unsafe fn test_mm_maskz_loadu_pd() {
50159 let a = &[1.0_f64, 2.0];
50160 let p = a.as_ptr();
50161 let m = 0b10;
50162 let r = _mm_maskz_loadu_pd(m, black_box(p));
50163 let e = _mm_setr_pd(0.0, 2.0);
50164 assert_eq_m128d(r, e);
50165 }
50166
50167 #[simd_test(enable = "avx512f,avx512vl")]
50168 unsafe fn test_mm_mask_load_pd() {
50169 #[repr(align(16))]
50170 struct Align {
50171 data: [f64; 2], // 16 bytes
50172 }
50173 let src = _mm_set1_pd(42.0);
50174 let a = Align {
50175 data: [1.0_f64, 2.0],
50176 };
50177 let p = a.data.as_ptr();
50178 let m = 0b10;
50179 let r = _mm_mask_load_pd(src, m, black_box(p));
50180 let e = _mm_setr_pd(42.0, 2.0);
50181 assert_eq_m128d(r, e);
50182 }
50183
50184 #[simd_test(enable = "avx512f,avx512vl")]
50185 unsafe fn test_mm_maskz_load_pd() {
50186 #[repr(align(16))]
50187 struct Align {
50188 data: [f64; 2], // 16 bytes
50189 }
50190 let a = Align {
50191 data: [1.0_f64, 2.0],
50192 };
50193 let p = a.data.as_ptr();
50194 let m = 0b10;
50195 let r = _mm_maskz_load_pd(m, black_box(p));
50196 let e = _mm_setr_pd(0.0, 2.0);
50197 assert_eq_m128d(r, e);
50198 }
50199
50200 #[simd_test(enable = "avx512f,avx512vl")]
50201 unsafe fn test_mm_mask_storeu_pd() {
50202 let mut r = [42_f64; 2];
50203 let a = _mm_setr_pd(1.0, 2.0);
50204 let m = 0b10;
50205 _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
50206 let e = _mm_setr_pd(42.0, 2.0);
50207 assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
50208 }
50209
50210 #[simd_test(enable = "avx512f,avx512vl")]
50211 unsafe fn test_mm_mask_store_pd() {
50212 #[repr(align(16))]
50213 struct Align {
50214 data: [f64; 2], // 16 bytes
50215 }
50216 let mut r = Align { data: [42.0; 2] };
50217 let a = _mm_setr_pd(1.0, 2.0);
50218 let m = 0b10;
50219 _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
50220 let e = _mm_setr_pd(42.0, 2.0);
50221 assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
50222 }
50223
50224 #[simd_test(enable = "avx512f")]
50225 unsafe fn test_mm512_setr_pd() {
50226 let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
50227 assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
50228 }
50229
50230 #[simd_test(enable = "avx512f")]
50231 unsafe fn test_mm512_set_pd() {
50232 let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
50233 assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
50234 }
50235
50236 #[simd_test(enable = "avx512f")]
50237 unsafe fn test_mm512_rol_epi32() {
50238 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50239 let r = _mm512_rol_epi32::<1>(a);
50240 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50241 assert_eq_m512i(r, e);
50242 }
50243
50244 #[simd_test(enable = "avx512f")]
50245 unsafe fn test_mm512_mask_rol_epi32() {
50246 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50247 let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
50248 assert_eq_m512i(r, a);
50249 let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
50250 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50251 assert_eq_m512i(r, e);
50252 }
50253
50254 #[simd_test(enable = "avx512f")]
50255 unsafe fn test_mm512_maskz_rol_epi32() {
50256 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50257 let r = _mm512_maskz_rol_epi32::<1>(0, a);
50258 assert_eq_m512i(r, _mm512_setzero_si512());
50259 let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
50260 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50261 assert_eq_m512i(r, e);
50262 }
50263
50264 #[simd_test(enable = "avx512f,avx512vl")]
50265 unsafe fn test_mm256_rol_epi32() {
50266 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50267 let r = _mm256_rol_epi32::<1>(a);
50268 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50269 assert_eq_m256i(r, e);
50270 }
50271
50272 #[simd_test(enable = "avx512f,avx512vl")]
50273 unsafe fn test_mm256_mask_rol_epi32() {
50274 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50275 let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
50276 assert_eq_m256i(r, a);
50277 let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
50278 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50279 assert_eq_m256i(r, e);
50280 }
50281
50282 #[simd_test(enable = "avx512f,avx512vl")]
50283 unsafe fn test_mm256_maskz_rol_epi32() {
50284 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50285 let r = _mm256_maskz_rol_epi32::<1>(0, a);
50286 assert_eq_m256i(r, _mm256_setzero_si256());
50287 let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
50288 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50289 assert_eq_m256i(r, e);
50290 }
50291
50292 #[simd_test(enable = "avx512f,avx512vl")]
50293 unsafe fn test_mm_rol_epi32() {
50294 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50295 let r = _mm_rol_epi32::<1>(a);
50296 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50297 assert_eq_m128i(r, e);
50298 }
50299
50300 #[simd_test(enable = "avx512f,avx512vl")]
50301 unsafe fn test_mm_mask_rol_epi32() {
50302 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50303 let r = _mm_mask_rol_epi32::<1>(a, 0, a);
50304 assert_eq_m128i(r, a);
50305 let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
50306 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50307 assert_eq_m128i(r, e);
50308 }
50309
50310 #[simd_test(enable = "avx512f,avx512vl")]
50311 unsafe fn test_mm_maskz_rol_epi32() {
50312 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50313 let r = _mm_maskz_rol_epi32::<1>(0, a);
50314 assert_eq_m128i(r, _mm_setzero_si128());
50315 let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
50316 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50317 assert_eq_m128i(r, e);
50318 }
50319
50320 #[simd_test(enable = "avx512f")]
50321 unsafe fn test_mm512_ror_epi32() {
50322 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50323 let r = _mm512_ror_epi32::<1>(a);
50324 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50325 assert_eq_m512i(r, e);
50326 }
50327
50328 #[simd_test(enable = "avx512f")]
50329 unsafe fn test_mm512_mask_ror_epi32() {
50330 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50331 let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
50332 assert_eq_m512i(r, a);
50333 let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
50334 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50335 assert_eq_m512i(r, e);
50336 }
50337
50338 #[simd_test(enable = "avx512f")]
50339 unsafe fn test_mm512_maskz_ror_epi32() {
50340 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50341 let r = _mm512_maskz_ror_epi32::<1>(0, a);
50342 assert_eq_m512i(r, _mm512_setzero_si512());
50343 let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
50344 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50345 assert_eq_m512i(r, e);
50346 }
50347
50348 #[simd_test(enable = "avx512f,avx512vl")]
50349 unsafe fn test_mm256_ror_epi32() {
50350 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50351 let r = _mm256_ror_epi32::<1>(a);
50352 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50353 assert_eq_m256i(r, e);
50354 }
50355
50356 #[simd_test(enable = "avx512f,avx512vl")]
50357 unsafe fn test_mm256_mask_ror_epi32() {
50358 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50359 let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
50360 assert_eq_m256i(r, a);
50361 let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
50362 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50363 assert_eq_m256i(r, e);
50364 }
50365
50366 #[simd_test(enable = "avx512f,avx512vl")]
50367 unsafe fn test_mm256_maskz_ror_epi32() {
50368 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50369 let r = _mm256_maskz_ror_epi32::<1>(0, a);
50370 assert_eq_m256i(r, _mm256_setzero_si256());
50371 let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
50372 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50373 assert_eq_m256i(r, e);
50374 }
50375
50376 #[simd_test(enable = "avx512f,avx512vl")]
50377 unsafe fn test_mm_ror_epi32() {
50378 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50379 let r = _mm_ror_epi32::<1>(a);
50380 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50381 assert_eq_m128i(r, e);
50382 }
50383
50384 #[simd_test(enable = "avx512f,avx512vl")]
50385 unsafe fn test_mm_mask_ror_epi32() {
50386 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50387 let r = _mm_mask_ror_epi32::<1>(a, 0, a);
50388 assert_eq_m128i(r, a);
50389 let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
50390 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50391 assert_eq_m128i(r, e);
50392 }
50393
50394 #[simd_test(enable = "avx512f,avx512vl")]
50395 unsafe fn test_mm_maskz_ror_epi32() {
50396 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50397 let r = _mm_maskz_ror_epi32::<1>(0, a);
50398 assert_eq_m128i(r, _mm_setzero_si128());
50399 let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
50400 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50401 assert_eq_m128i(r, e);
50402 }
50403
50404 #[simd_test(enable = "avx512f")]
50405 unsafe fn test_mm512_slli_epi32() {
50406 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50407 let r = _mm512_slli_epi32::<1>(a);
50408 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50409 assert_eq_m512i(r, e);
50410 }
50411
50412 #[simd_test(enable = "avx512f")]
50413 unsafe fn test_mm512_mask_slli_epi32() {
50414 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50415 let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
50416 assert_eq_m512i(r, a);
50417 let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
50418 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50419 assert_eq_m512i(r, e);
50420 }
50421
50422 #[simd_test(enable = "avx512f")]
50423 unsafe fn test_mm512_maskz_slli_epi32() {
50424 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50425 let r = _mm512_maskz_slli_epi32::<1>(0, a);
50426 assert_eq_m512i(r, _mm512_setzero_si512());
50427 let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
50428 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
50429 assert_eq_m512i(r, e);
50430 }
50431
50432 #[simd_test(enable = "avx512f,avx512vl")]
50433 unsafe fn test_mm256_mask_slli_epi32() {
50434 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50435 let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
50436 assert_eq_m256i(r, a);
50437 let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
50438 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50439 assert_eq_m256i(r, e);
50440 }
50441
50442 #[simd_test(enable = "avx512f,avx512vl")]
50443 unsafe fn test_mm256_maskz_slli_epi32() {
50444 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50445 let r = _mm256_maskz_slli_epi32::<1>(0, a);
50446 assert_eq_m256i(r, _mm256_setzero_si256());
50447 let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
50448 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50449 assert_eq_m256i(r, e);
50450 }
50451
50452 #[simd_test(enable = "avx512f,avx512vl")]
50453 unsafe fn test_mm_mask_slli_epi32() {
50454 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50455 let r = _mm_mask_slli_epi32::<1>(a, 0, a);
50456 assert_eq_m128i(r, a);
50457 let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
50458 let e = _mm_set_epi32(0, 2, 2, 2);
50459 assert_eq_m128i(r, e);
50460 }
50461
50462 #[simd_test(enable = "avx512f,avx512vl")]
50463 unsafe fn test_mm_maskz_slli_epi32() {
50464 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50465 let r = _mm_maskz_slli_epi32::<1>(0, a);
50466 assert_eq_m128i(r, _mm_setzero_si128());
50467 let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
50468 let e = _mm_set_epi32(0, 2, 2, 2);
50469 assert_eq_m128i(r, e);
50470 }
50471
50472 #[simd_test(enable = "avx512f")]
50473 unsafe fn test_mm512_srli_epi32() {
50474 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50475 let r = _mm512_srli_epi32::<1>(a);
50476 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50477 assert_eq_m512i(r, e);
50478 }
50479
50480 #[simd_test(enable = "avx512f")]
50481 unsafe fn test_mm512_mask_srli_epi32() {
50482 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50483 let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
50484 assert_eq_m512i(r, a);
50485 let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
50486 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50487 assert_eq_m512i(r, e);
50488 }
50489
50490 #[simd_test(enable = "avx512f")]
50491 unsafe fn test_mm512_maskz_srli_epi32() {
50492 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
50493 let r = _mm512_maskz_srli_epi32::<1>(0, a);
50494 assert_eq_m512i(r, _mm512_setzero_si512());
50495 let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
50496 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
50497 assert_eq_m512i(r, e);
50498 }
50499
50500 #[simd_test(enable = "avx512f,avx512vl")]
50501 unsafe fn test_mm256_mask_srli_epi32() {
50502 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50503 let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
50504 assert_eq_m256i(r, a);
50505 let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
50506 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50507 assert_eq_m256i(r, e);
50508 }
50509
50510 #[simd_test(enable = "avx512f,avx512vl")]
50511 unsafe fn test_mm256_maskz_srli_epi32() {
50512 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50513 let r = _mm256_maskz_srli_epi32::<1>(0, a);
50514 assert_eq_m256i(r, _mm256_setzero_si256());
50515 let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
50516 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50517 assert_eq_m256i(r, e);
50518 }
50519
50520 #[simd_test(enable = "avx512f,avx512vl")]
50521 unsafe fn test_mm_mask_srli_epi32() {
50522 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50523 let r = _mm_mask_srli_epi32::<1>(a, 0, a);
50524 assert_eq_m128i(r, a);
50525 let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
50526 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50527 assert_eq_m128i(r, e);
50528 }
50529
50530 #[simd_test(enable = "avx512f,avx512vl")]
50531 unsafe fn test_mm_maskz_srli_epi32() {
50532 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50533 let r = _mm_maskz_srli_epi32::<1>(0, a);
50534 assert_eq_m128i(r, _mm_setzero_si128());
50535 let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
50536 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50537 assert_eq_m128i(r, e);
50538 }
50539
50540 #[simd_test(enable = "avx512f")]
50541 unsafe fn test_mm512_rolv_epi32() {
50542 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50543 let b = _mm512_set1_epi32(1);
50544 let r = _mm512_rolv_epi32(a, b);
50545 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50546 assert_eq_m512i(r, e);
50547 }
50548
50549 #[simd_test(enable = "avx512f")]
50550 unsafe fn test_mm512_mask_rolv_epi32() {
50551 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50552 let b = _mm512_set1_epi32(1);
50553 let r = _mm512_mask_rolv_epi32(a, 0, a, b);
50554 assert_eq_m512i(r, a);
50555 let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
50556 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50557 assert_eq_m512i(r, e);
50558 }
50559
50560 #[simd_test(enable = "avx512f")]
50561 unsafe fn test_mm512_maskz_rolv_epi32() {
50562 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50563 let b = _mm512_set1_epi32(1);
50564 let r = _mm512_maskz_rolv_epi32(0, a, b);
50565 assert_eq_m512i(r, _mm512_setzero_si512());
50566 let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
50567 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50568 assert_eq_m512i(r, e);
50569 }
50570
50571 #[simd_test(enable = "avx512f,avx512vl")]
50572 unsafe fn test_mm256_rolv_epi32() {
50573 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50574 let b = _mm256_set1_epi32(1);
50575 let r = _mm256_rolv_epi32(a, b);
50576 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50577 assert_eq_m256i(r, e);
50578 }
50579
50580 #[simd_test(enable = "avx512f,avx512vl")]
50581 unsafe fn test_mm256_mask_rolv_epi32() {
50582 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50583 let b = _mm256_set1_epi32(1);
50584 let r = _mm256_mask_rolv_epi32(a, 0, a, b);
50585 assert_eq_m256i(r, a);
50586 let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
50587 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50588 assert_eq_m256i(r, e);
50589 }
50590
50591 #[simd_test(enable = "avx512f,avx512vl")]
50592 unsafe fn test_mm256_maskz_rolv_epi32() {
50593 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50594 let b = _mm256_set1_epi32(1);
50595 let r = _mm256_maskz_rolv_epi32(0, a, b);
50596 assert_eq_m256i(r, _mm256_setzero_si256());
50597 let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
50598 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50599 assert_eq_m256i(r, e);
50600 }
50601
50602 #[simd_test(enable = "avx512f,avx512vl")]
50603 unsafe fn test_mm_rolv_epi32() {
50604 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50605 let b = _mm_set1_epi32(1);
50606 let r = _mm_rolv_epi32(a, b);
50607 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50608 assert_eq_m128i(r, e);
50609 }
50610
50611 #[simd_test(enable = "avx512f,avx512vl")]
50612 unsafe fn test_mm_mask_rolv_epi32() {
50613 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50614 let b = _mm_set1_epi32(1);
50615 let r = _mm_mask_rolv_epi32(a, 0, a, b);
50616 assert_eq_m128i(r, a);
50617 let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
50618 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50619 assert_eq_m128i(r, e);
50620 }
50621
50622 #[simd_test(enable = "avx512f,avx512vl")]
50623 unsafe fn test_mm_maskz_rolv_epi32() {
50624 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50625 let b = _mm_set1_epi32(1);
50626 let r = _mm_maskz_rolv_epi32(0, a, b);
50627 assert_eq_m128i(r, _mm_setzero_si128());
50628 let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
50629 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50630 assert_eq_m128i(r, e);
50631 }
50632
50633 #[simd_test(enable = "avx512f")]
50634 unsafe fn test_mm512_rorv_epi32() {
50635 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50636 let b = _mm512_set1_epi32(1);
50637 let r = _mm512_rorv_epi32(a, b);
50638 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50639 assert_eq_m512i(r, e);
50640 }
50641
50642 #[simd_test(enable = "avx512f")]
50643 unsafe fn test_mm512_mask_rorv_epi32() {
50644 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50645 let b = _mm512_set1_epi32(1);
50646 let r = _mm512_mask_rorv_epi32(a, 0, a, b);
50647 assert_eq_m512i(r, a);
50648 let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
50649 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50650 assert_eq_m512i(r, e);
50651 }
50652
50653 #[simd_test(enable = "avx512f")]
50654 unsafe fn test_mm512_maskz_rorv_epi32() {
50655 let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50656 let b = _mm512_set1_epi32(1);
50657 let r = _mm512_maskz_rorv_epi32(0, a, b);
50658 assert_eq_m512i(r, _mm512_setzero_si512());
50659 let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
50660 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50661 assert_eq_m512i(r, e);
50662 }
50663
50664 #[simd_test(enable = "avx512f,avx512vl")]
50665 unsafe fn test_mm256_rorv_epi32() {
50666 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50667 let b = _mm256_set1_epi32(1);
50668 let r = _mm256_rorv_epi32(a, b);
50669 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50670 assert_eq_m256i(r, e);
50671 }
50672
50673 #[simd_test(enable = "avx512f,avx512vl")]
50674 unsafe fn test_mm256_mask_rorv_epi32() {
50675 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50676 let b = _mm256_set1_epi32(1);
50677 let r = _mm256_mask_rorv_epi32(a, 0, a, b);
50678 assert_eq_m256i(r, a);
50679 let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
50680 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50681 assert_eq_m256i(r, e);
50682 }
50683
50684 #[simd_test(enable = "avx512f,avx512vl")]
50685 unsafe fn test_mm256_maskz_rorv_epi32() {
50686 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50687 let b = _mm256_set1_epi32(1);
50688 let r = _mm256_maskz_rorv_epi32(0, a, b);
50689 assert_eq_m256i(r, _mm256_setzero_si256());
50690 let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
50691 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50692 assert_eq_m256i(r, e);
50693 }
50694
50695 #[simd_test(enable = "avx512f,avx512vl")]
50696 unsafe fn test_mm_rorv_epi32() {
50697 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50698 let b = _mm_set1_epi32(1);
50699 let r = _mm_rorv_epi32(a, b);
50700 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50701 assert_eq_m128i(r, e);
50702 }
50703
50704 #[simd_test(enable = "avx512f,avx512vl")]
50705 unsafe fn test_mm_mask_rorv_epi32() {
50706 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50707 let b = _mm_set1_epi32(1);
50708 let r = _mm_mask_rorv_epi32(a, 0, a, b);
50709 assert_eq_m128i(r, a);
50710 let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
50711 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50712 assert_eq_m128i(r, e);
50713 }
50714
50715 #[simd_test(enable = "avx512f,avx512vl")]
50716 unsafe fn test_mm_maskz_rorv_epi32() {
50717 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50718 let b = _mm_set1_epi32(1);
50719 let r = _mm_maskz_rorv_epi32(0, a, b);
50720 assert_eq_m128i(r, _mm_setzero_si128());
50721 let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
50722 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50723 assert_eq_m128i(r, e);
50724 }
50725
50726 #[simd_test(enable = "avx512f")]
50727 unsafe fn test_mm512_sllv_epi32() {
50728 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50729 let count = _mm512_set1_epi32(1);
50730 let r = _mm512_sllv_epi32(a, count);
50731 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50732 assert_eq_m512i(r, e);
50733 }
50734
50735 #[simd_test(enable = "avx512f")]
50736 unsafe fn test_mm512_mask_sllv_epi32() {
50737 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50738 let count = _mm512_set1_epi32(1);
50739 let r = _mm512_mask_sllv_epi32(a, 0, a, count);
50740 assert_eq_m512i(r, a);
50741 let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
50742 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50743 assert_eq_m512i(r, e);
50744 }
50745
50746 #[simd_test(enable = "avx512f")]
50747 unsafe fn test_mm512_maskz_sllv_epi32() {
50748 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50749 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50750 let r = _mm512_maskz_sllv_epi32(0, a, count);
50751 assert_eq_m512i(r, _mm512_setzero_si512());
50752 let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
50753 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
50754 assert_eq_m512i(r, e);
50755 }
50756
50757 #[simd_test(enable = "avx512f,avx512vl")]
50758 unsafe fn test_mm256_mask_sllv_epi32() {
50759 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50760 let count = _mm256_set1_epi32(1);
50761 let r = _mm256_mask_sllv_epi32(a, 0, a, count);
50762 assert_eq_m256i(r, a);
50763 let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
50764 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50765 assert_eq_m256i(r, e);
50766 }
50767
50768 #[simd_test(enable = "avx512f,avx512vl")]
50769 unsafe fn test_mm256_maskz_sllv_epi32() {
50770 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50771 let count = _mm256_set1_epi32(1);
50772 let r = _mm256_maskz_sllv_epi32(0, a, count);
50773 assert_eq_m256i(r, _mm256_setzero_si256());
50774 let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
50775 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50776 assert_eq_m256i(r, e);
50777 }
50778
50779 #[simd_test(enable = "avx512f,avx512vl")]
50780 unsafe fn test_mm_mask_sllv_epi32() {
50781 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50782 let count = _mm_set1_epi32(1);
50783 let r = _mm_mask_sllv_epi32(a, 0, a, count);
50784 assert_eq_m128i(r, a);
50785 let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
50786 let e = _mm_set_epi32(0, 2, 2, 2);
50787 assert_eq_m128i(r, e);
50788 }
50789
50790 #[simd_test(enable = "avx512f,avx512vl")]
50791 unsafe fn test_mm_maskz_sllv_epi32() {
50792 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50793 let count = _mm_set1_epi32(1);
50794 let r = _mm_maskz_sllv_epi32(0, a, count);
50795 assert_eq_m128i(r, _mm_setzero_si128());
50796 let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
50797 let e = _mm_set_epi32(0, 2, 2, 2);
50798 assert_eq_m128i(r, e);
50799 }
50800
50801 #[simd_test(enable = "avx512f")]
50802 unsafe fn test_mm512_srlv_epi32() {
50803 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50804 let count = _mm512_set1_epi32(1);
50805 let r = _mm512_srlv_epi32(a, count);
50806 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50807 assert_eq_m512i(r, e);
50808 }
50809
50810 #[simd_test(enable = "avx512f")]
50811 unsafe fn test_mm512_mask_srlv_epi32() {
50812 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50813 let count = _mm512_set1_epi32(1);
50814 let r = _mm512_mask_srlv_epi32(a, 0, a, count);
50815 assert_eq_m512i(r, a);
50816 let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
50817 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50818 assert_eq_m512i(r, e);
50819 }
50820
50821 #[simd_test(enable = "avx512f")]
50822 unsafe fn test_mm512_maskz_srlv_epi32() {
50823 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
50824 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50825 let r = _mm512_maskz_srlv_epi32(0, a, count);
50826 assert_eq_m512i(r, _mm512_setzero_si512());
50827 let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
50828 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
50829 assert_eq_m512i(r, e);
50830 }
50831
50832 #[simd_test(enable = "avx512f,avx512vl")]
50833 unsafe fn test_mm256_mask_srlv_epi32() {
50834 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50835 let count = _mm256_set1_epi32(1);
50836 let r = _mm256_mask_srlv_epi32(a, 0, a, count);
50837 assert_eq_m256i(r, a);
50838 let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
50839 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50840 assert_eq_m256i(r, e);
50841 }
50842
50843 #[simd_test(enable = "avx512f,avx512vl")]
50844 unsafe fn test_mm256_maskz_srlv_epi32() {
50845 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50846 let count = _mm256_set1_epi32(1);
50847 let r = _mm256_maskz_srlv_epi32(0, a, count);
50848 assert_eq_m256i(r, _mm256_setzero_si256());
50849 let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
50850 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50851 assert_eq_m256i(r, e);
50852 }
50853
50854 #[simd_test(enable = "avx512f,avx512vl")]
50855 unsafe fn test_mm_mask_srlv_epi32() {
50856 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50857 let count = _mm_set1_epi32(1);
50858 let r = _mm_mask_srlv_epi32(a, 0, a, count);
50859 assert_eq_m128i(r, a);
50860 let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
50861 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50862 assert_eq_m128i(r, e);
50863 }
50864
50865 #[simd_test(enable = "avx512f,avx512vl")]
50866 unsafe fn test_mm_maskz_srlv_epi32() {
50867 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50868 let count = _mm_set1_epi32(1);
50869 let r = _mm_maskz_srlv_epi32(0, a, count);
50870 assert_eq_m128i(r, _mm_setzero_si128());
50871 let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
50872 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50873 assert_eq_m128i(r, e);
50874 }
50875
50876 #[simd_test(enable = "avx512f")]
50877 unsafe fn test_mm512_sll_epi32() {
50878 #[rustfmt::skip]
50879 let a = _mm512_set_epi32(
50880 1 << 31, 1 << 0, 1 << 1, 1 << 2,
50881 0, 0, 0, 0,
50882 0, 0, 0, 0,
50883 0, 0, 0, 0,
50884 );
50885 let count = _mm_set_epi32(0, 0, 0, 2);
50886 let r = _mm512_sll_epi32(a, count);
50887 #[rustfmt::skip]
50888 let e = _mm512_set_epi32(
50889 0, 1 << 2, 1 << 3, 1 << 4,
50890 0, 0, 0, 0,
50891 0, 0, 0, 0,
50892 0, 0, 0, 0,
50893 );
50894 assert_eq_m512i(r, e);
50895 }
50896
50897 #[simd_test(enable = "avx512f")]
50898 unsafe fn test_mm512_mask_sll_epi32() {
50899 #[rustfmt::skip]
50900 let a = _mm512_set_epi32(
50901 1 << 31, 1 << 0, 1 << 1, 1 << 2,
50902 0, 0, 0, 0,
50903 0, 0, 0, 0,
50904 0, 0, 0, 0,
50905 );
50906 let count = _mm_set_epi32(0, 0, 0, 2);
50907 let r = _mm512_mask_sll_epi32(a, 0, a, count);
50908 assert_eq_m512i(r, a);
50909 let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
50910 #[rustfmt::skip]
50911 let e = _mm512_set_epi32(
50912 0, 1 << 2, 1 << 3, 1 << 4,
50913 0, 0, 0, 0,
50914 0, 0, 0, 0,
50915 0, 0, 0, 0,
50916 );
50917 assert_eq_m512i(r, e);
50918 }
50919
50920 #[simd_test(enable = "avx512f")]
50921 unsafe fn test_mm512_maskz_sll_epi32() {
50922 #[rustfmt::skip]
50923 let a = _mm512_set_epi32(
50924 1 << 31, 1 << 0, 1 << 1, 1 << 2,
50925 0, 0, 0, 0,
50926 0, 0, 0, 0,
50927 0, 0, 0, 1 << 31,
50928 );
50929 let count = _mm_set_epi32(2, 0, 0, 2);
50930 let r = _mm512_maskz_sll_epi32(0, a, count);
50931 assert_eq_m512i(r, _mm512_setzero_si512());
50932 let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
50933 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
50934 assert_eq_m512i(r, e);
50935 }
50936
50937 #[simd_test(enable = "avx512f,avx512vl")]
50938 unsafe fn test_mm256_mask_sll_epi32() {
50939 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
50940 let count = _mm_set_epi32(0, 0, 0, 1);
50941 let r = _mm256_mask_sll_epi32(a, 0, a, count);
50942 assert_eq_m256i(r, a);
50943 let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
50944 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
50945 assert_eq_m256i(r, e);
50946 }
50947
50948 #[simd_test(enable = "avx512f,avx512vl")]
50949 unsafe fn test_mm256_maskz_sll_epi32() {
50950 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
50951 let count = _mm_set_epi32(0, 0, 0, 1);
50952 let r = _mm256_maskz_sll_epi32(0, a, count);
50953 assert_eq_m256i(r, _mm256_setzero_si256());
50954 let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
50955 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
50956 assert_eq_m256i(r, e);
50957 }
50958
50959 #[simd_test(enable = "avx512f,avx512vl")]
50960 unsafe fn test_mm_mask_sll_epi32() {
50961 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
50962 let count = _mm_set_epi32(0, 0, 0, 1);
50963 let r = _mm_mask_sll_epi32(a, 0, a, count);
50964 assert_eq_m128i(r, a);
50965 let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
50966 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
50967 assert_eq_m128i(r, e);
50968 }
50969
50970 #[simd_test(enable = "avx512f,avx512vl")]
50971 unsafe fn test_mm_maskz_sll_epi32() {
50972 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
50973 let count = _mm_set_epi32(0, 0, 0, 1);
50974 let r = _mm_maskz_sll_epi32(0, a, count);
50975 assert_eq_m128i(r, _mm_setzero_si128());
50976 let r = _mm_maskz_sll_epi32(0b00001111, a, count);
50977 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
50978 assert_eq_m128i(r, e);
50979 }
50980
50981 #[simd_test(enable = "avx512f")]
50982 unsafe fn test_mm512_srl_epi32() {
50983 #[rustfmt::skip]
50984 let a = _mm512_set_epi32(
50985 1 << 31, 1 << 0, 1 << 1, 1 << 2,
50986 0, 0, 0, 0,
50987 0, 0, 0, 0,
50988 0, 0, 0, 0,
50989 );
50990 let count = _mm_set_epi32(0, 0, 0, 2);
50991 let r = _mm512_srl_epi32(a, count);
50992 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
50993 assert_eq_m512i(r, e);
50994 }
50995
50996 #[simd_test(enable = "avx512f")]
50997 unsafe fn test_mm512_mask_srl_epi32() {
50998 #[rustfmt::skip]
50999 let a = _mm512_set_epi32(
51000 1 << 31, 1 << 0, 1 << 1, 1 << 2,
51001 0, 0, 0, 0,
51002 0, 0, 0, 0,
51003 0, 0, 0, 0,
51004 );
51005 let count = _mm_set_epi32(0, 0, 0, 2);
51006 let r = _mm512_mask_srl_epi32(a, 0, a, count);
51007 assert_eq_m512i(r, a);
51008 let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
51009 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
51010 assert_eq_m512i(r, e);
51011 }
51012
51013 #[simd_test(enable = "avx512f")]
51014 unsafe fn test_mm512_maskz_srl_epi32() {
51015 #[rustfmt::skip]
51016 let a = _mm512_set_epi32(
51017 1 << 31, 1 << 0, 1 << 1, 1 << 2,
51018 0, 0, 0, 0,
51019 0, 0, 0, 0,
51020 0, 0, 0, 1 << 31,
51021 );
51022 let count = _mm_set_epi32(2, 0, 0, 2);
51023 let r = _mm512_maskz_srl_epi32(0, a, count);
51024 assert_eq_m512i(r, _mm512_setzero_si512());
51025 let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
51026 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
51027 assert_eq_m512i(r, e);
51028 }
51029
51030 #[simd_test(enable = "avx512f,avx512vl")]
51031 unsafe fn test_mm256_mask_srl_epi32() {
51032 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51033 let count = _mm_set_epi32(0, 0, 0, 1);
51034 let r = _mm256_mask_srl_epi32(a, 0, a, count);
51035 assert_eq_m256i(r, a);
51036 let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
51037 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51038 assert_eq_m256i(r, e);
51039 }
51040
51041 #[simd_test(enable = "avx512f,avx512vl")]
51042 unsafe fn test_mm256_maskz_srl_epi32() {
51043 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51044 let count = _mm_set_epi32(0, 0, 0, 1);
51045 let r = _mm256_maskz_srl_epi32(0, a, count);
51046 assert_eq_m256i(r, _mm256_setzero_si256());
51047 let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
51048 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51049 assert_eq_m256i(r, e);
51050 }
51051
51052 #[simd_test(enable = "avx512f,avx512vl")]
51053 unsafe fn test_mm_mask_srl_epi32() {
51054 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51055 let count = _mm_set_epi32(0, 0, 0, 1);
51056 let r = _mm_mask_srl_epi32(a, 0, a, count);
51057 assert_eq_m128i(r, a);
51058 let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
51059 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51060 assert_eq_m128i(r, e);
51061 }
51062
51063 #[simd_test(enable = "avx512f,avx512vl")]
51064 unsafe fn test_mm_maskz_srl_epi32() {
51065 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51066 let count = _mm_set_epi32(0, 0, 0, 1);
51067 let r = _mm_maskz_srl_epi32(0, a, count);
51068 assert_eq_m128i(r, _mm_setzero_si128());
51069 let r = _mm_maskz_srl_epi32(0b00001111, a, count);
51070 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51071 assert_eq_m128i(r, e);
51072 }
51073
51074 #[simd_test(enable = "avx512f")]
51075 unsafe fn test_mm512_sra_epi32() {
51076 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51077 let count = _mm_set_epi32(1, 0, 0, 2);
51078 let r = _mm512_sra_epi32(a, count);
51079 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
51080 assert_eq_m512i(r, e);
51081 }
51082
51083 #[simd_test(enable = "avx512f")]
51084 unsafe fn test_mm512_mask_sra_epi32() {
51085 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
51086 let count = _mm_set_epi32(0, 0, 0, 2);
51087 let r = _mm512_mask_sra_epi32(a, 0, a, count);
51088 assert_eq_m512i(r, a);
51089 let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
51090 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
51091 assert_eq_m512i(r, e);
51092 }
51093
51094 #[simd_test(enable = "avx512f")]
51095 unsafe fn test_mm512_maskz_sra_epi32() {
51096 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
51097 let count = _mm_set_epi32(2, 0, 0, 2);
51098 let r = _mm512_maskz_sra_epi32(0, a, count);
51099 assert_eq_m512i(r, _mm512_setzero_si512());
51100 let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
51101 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
51102 assert_eq_m512i(r, e);
51103 }
51104
51105 #[simd_test(enable = "avx512f,avx512vl")]
51106 unsafe fn test_mm256_mask_sra_epi32() {
51107 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51108 let count = _mm_set_epi32(0, 0, 0, 1);
51109 let r = _mm256_mask_sra_epi32(a, 0, a, count);
51110 assert_eq_m256i(r, a);
51111 let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
51112 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51113 assert_eq_m256i(r, e);
51114 }
51115
51116 #[simd_test(enable = "avx512f,avx512vl")]
51117 unsafe fn test_mm256_maskz_sra_epi32() {
51118 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51119 let count = _mm_set_epi32(0, 0, 0, 1);
51120 let r = _mm256_maskz_sra_epi32(0, a, count);
51121 assert_eq_m256i(r, _mm256_setzero_si256());
51122 let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
51123 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51124 assert_eq_m256i(r, e);
51125 }
51126
51127 #[simd_test(enable = "avx512f,avx512vl")]
51128 unsafe fn test_mm_mask_sra_epi32() {
51129 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51130 let count = _mm_set_epi32(0, 0, 0, 1);
51131 let r = _mm_mask_sra_epi32(a, 0, a, count);
51132 assert_eq_m128i(r, a);
51133 let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
51134 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51135 assert_eq_m128i(r, e);
51136 }
51137
51138 #[simd_test(enable = "avx512f,avx512vl")]
51139 unsafe fn test_mm_maskz_sra_epi32() {
51140 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51141 let count = _mm_set_epi32(0, 0, 0, 1);
51142 let r = _mm_maskz_sra_epi32(0, a, count);
51143 assert_eq_m128i(r, _mm_setzero_si128());
51144 let r = _mm_maskz_sra_epi32(0b00001111, a, count);
51145 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51146 assert_eq_m128i(r, e);
51147 }
51148
51149 #[simd_test(enable = "avx512f")]
51150 unsafe fn test_mm512_srav_epi32() {
51151 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51152 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
51153 let r = _mm512_srav_epi32(a, count);
51154 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51155 assert_eq_m512i(r, e);
51156 }
51157
51158 #[simd_test(enable = "avx512f")]
51159 unsafe fn test_mm512_mask_srav_epi32() {
51160 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
51161 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51162 let r = _mm512_mask_srav_epi32(a, 0, a, count);
51163 assert_eq_m512i(r, a);
51164 let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
51165 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
51166 assert_eq_m512i(r, e);
51167 }
51168
51169 #[simd_test(enable = "avx512f")]
51170 unsafe fn test_mm512_maskz_srav_epi32() {
51171 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
51172 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
51173 let r = _mm512_maskz_srav_epi32(0, a, count);
51174 assert_eq_m512i(r, _mm512_setzero_si512());
51175 let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
51176 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
51177 assert_eq_m512i(r, e);
51178 }
51179
51180 #[simd_test(enable = "avx512f,avx512vl")]
51181 unsafe fn test_mm256_mask_srav_epi32() {
51182 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51183 let count = _mm256_set1_epi32(1);
51184 let r = _mm256_mask_srav_epi32(a, 0, a, count);
51185 assert_eq_m256i(r, a);
51186 let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
51187 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51188 assert_eq_m256i(r, e);
51189 }
51190
51191 #[simd_test(enable = "avx512f,avx512vl")]
51192 unsafe fn test_mm256_maskz_srav_epi32() {
51193 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51194 let count = _mm256_set1_epi32(1);
51195 let r = _mm256_maskz_srav_epi32(0, a, count);
51196 assert_eq_m256i(r, _mm256_setzero_si256());
51197 let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
51198 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51199 assert_eq_m256i(r, e);
51200 }
51201
51202 #[simd_test(enable = "avx512f,avx512vl")]
51203 unsafe fn test_mm_mask_srav_epi32() {
51204 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51205 let count = _mm_set1_epi32(1);
51206 let r = _mm_mask_srav_epi32(a, 0, a, count);
51207 assert_eq_m128i(r, a);
51208 let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
51209 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51210 assert_eq_m128i(r, e);
51211 }
51212
51213 #[simd_test(enable = "avx512f,avx512vl")]
51214 unsafe fn test_mm_maskz_srav_epi32() {
51215 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51216 let count = _mm_set1_epi32(1);
51217 let r = _mm_maskz_srav_epi32(0, a, count);
51218 assert_eq_m128i(r, _mm_setzero_si128());
51219 let r = _mm_maskz_srav_epi32(0b00001111, a, count);
51220 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51221 assert_eq_m128i(r, e);
51222 }
51223
51224 #[simd_test(enable = "avx512f")]
51225 unsafe fn test_mm512_srai_epi32() {
51226 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
51227 let r = _mm512_srai_epi32::<2>(a);
51228 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
51229 assert_eq_m512i(r, e);
51230 }
51231
51232 #[simd_test(enable = "avx512f")]
51233 unsafe fn test_mm512_mask_srai_epi32() {
51234 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
51235 let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
51236 assert_eq_m512i(r, a);
51237 let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
51238 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
51239 assert_eq_m512i(r, e);
51240 }
51241
51242 #[simd_test(enable = "avx512f")]
51243 unsafe fn test_mm512_maskz_srai_epi32() {
51244 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
51245 let r = _mm512_maskz_srai_epi32::<2>(0, a);
51246 assert_eq_m512i(r, _mm512_setzero_si512());
51247 let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
51248 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
51249 assert_eq_m512i(r, e);
51250 }
51251
51252 #[simd_test(enable = "avx512f,avx512vl")]
51253 unsafe fn test_mm256_mask_srai_epi32() {
51254 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51255 let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
51256 assert_eq_m256i(r, a);
51257 let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
51258 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51259 assert_eq_m256i(r, e);
51260 }
51261
51262 #[simd_test(enable = "avx512f,avx512vl")]
51263 unsafe fn test_mm256_maskz_srai_epi32() {
51264 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51265 let r = _mm256_maskz_srai_epi32::<1>(0, a);
51266 assert_eq_m256i(r, _mm256_setzero_si256());
51267 let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
51268 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51269 assert_eq_m256i(r, e);
51270 }
51271
51272 #[simd_test(enable = "avx512f,avx512vl")]
51273 unsafe fn test_mm_mask_srai_epi32() {
51274 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51275 let r = _mm_mask_srai_epi32::<1>(a, 0, a);
51276 assert_eq_m128i(r, a);
51277 let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
51278 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51279 assert_eq_m128i(r, e);
51280 }
51281
51282 #[simd_test(enable = "avx512f,avx512vl")]
51283 unsafe fn test_mm_maskz_srai_epi32() {
51284 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51285 let r = _mm_maskz_srai_epi32::<1>(0, a);
51286 assert_eq_m128i(r, _mm_setzero_si128());
51287 let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
51288 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51289 assert_eq_m128i(r, e);
51290 }
51291
51292 #[simd_test(enable = "avx512f")]
51293 unsafe fn test_mm512_permute_ps() {
51294 let a = _mm512_setr_ps(
51295 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51296 );
51297 let r = _mm512_permute_ps::<0b11_11_11_11>(a);
51298 let e = _mm512_setr_ps(
51299 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
51300 );
51301 assert_eq_m512(r, e);
51302 }
51303
51304 #[simd_test(enable = "avx512f")]
51305 unsafe fn test_mm512_mask_permute_ps() {
51306 let a = _mm512_setr_ps(
51307 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51308 );
51309 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
51310 assert_eq_m512(r, a);
51311 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
51312 let e = _mm512_setr_ps(
51313 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
51314 );
51315 assert_eq_m512(r, e);
51316 }
51317
51318 #[simd_test(enable = "avx512f")]
51319 unsafe fn test_mm512_maskz_permute_ps() {
51320 let a = _mm512_setr_ps(
51321 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51322 );
51323 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
51324 assert_eq_m512(r, _mm512_setzero_ps());
51325 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
51326 let e = _mm512_setr_ps(
51327 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
51328 );
51329 assert_eq_m512(r, e);
51330 }
51331
51332 #[simd_test(enable = "avx512f,avx512vl")]
51333 unsafe fn test_mm256_mask_permute_ps() {
51334 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51335 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
51336 assert_eq_m256(r, a);
51337 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
51338 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
51339 assert_eq_m256(r, e);
51340 }
51341
51342 #[simd_test(enable = "avx512f,avx512vl")]
51343 unsafe fn test_mm256_maskz_permute_ps() {
51344 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51345 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
51346 assert_eq_m256(r, _mm256_setzero_ps());
51347 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
51348 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
51349 assert_eq_m256(r, e);
51350 }
51351
51352 #[simd_test(enable = "avx512f,avx512vl")]
51353 unsafe fn test_mm_mask_permute_ps() {
51354 let a = _mm_set_ps(0., 1., 2., 3.);
51355 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
51356 assert_eq_m128(r, a);
51357 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
51358 let e = _mm_set_ps(0., 0., 0., 0.);
51359 assert_eq_m128(r, e);
51360 }
51361
51362 #[simd_test(enable = "avx512f,avx512vl")]
51363 unsafe fn test_mm_maskz_permute_ps() {
51364 let a = _mm_set_ps(0., 1., 2., 3.);
51365 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
51366 assert_eq_m128(r, _mm_setzero_ps());
51367 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
51368 let e = _mm_set_ps(0., 0., 0., 0.);
51369 assert_eq_m128(r, e);
51370 }
51371
51372 #[simd_test(enable = "avx512f")]
51373 unsafe fn test_mm512_permutevar_epi32() {
51374 let idx = _mm512_set1_epi32(1);
51375 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51376 let r = _mm512_permutevar_epi32(idx, a);
51377 let e = _mm512_set1_epi32(14);
51378 assert_eq_m512i(r, e);
51379 }
51380
51381 #[simd_test(enable = "avx512f")]
51382 unsafe fn test_mm512_mask_permutevar_epi32() {
51383 let idx = _mm512_set1_epi32(1);
51384 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51385 let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
51386 assert_eq_m512i(r, a);
51387 let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
51388 let e = _mm512_set1_epi32(14);
51389 assert_eq_m512i(r, e);
51390 }
51391
51392 #[simd_test(enable = "avx512f")]
51393 unsafe fn test_mm512_permutevar_ps() {
51394 let a = _mm512_set_ps(
51395 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51396 );
51397 let b = _mm512_set1_epi32(0b01);
51398 let r = _mm512_permutevar_ps(a, b);
51399 let e = _mm512_set_ps(
51400 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
51401 );
51402 assert_eq_m512(r, e);
51403 }
51404
51405 #[simd_test(enable = "avx512f")]
51406 unsafe fn test_mm512_mask_permutevar_ps() {
51407 let a = _mm512_set_ps(
51408 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51409 );
51410 let b = _mm512_set1_epi32(0b01);
51411 let r = _mm512_mask_permutevar_ps(a, 0, a, b);
51412 assert_eq_m512(r, a);
51413 let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
51414 let e = _mm512_set_ps(
51415 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
51416 );
51417 assert_eq_m512(r, e);
51418 }
51419
51420 #[simd_test(enable = "avx512f")]
51421 unsafe fn test_mm512_maskz_permutevar_ps() {
51422 let a = _mm512_set_ps(
51423 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51424 );
51425 let b = _mm512_set1_epi32(0b01);
51426 let r = _mm512_maskz_permutevar_ps(0, a, b);
51427 assert_eq_m512(r, _mm512_setzero_ps());
51428 let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
51429 let e = _mm512_set_ps(
51430 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
51431 );
51432 assert_eq_m512(r, e);
51433 }
51434
51435 #[simd_test(enable = "avx512f,avx512vl")]
51436 unsafe fn test_mm256_mask_permutevar_ps() {
51437 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51438 let b = _mm256_set1_epi32(0b01);
51439 let r = _mm256_mask_permutevar_ps(a, 0, a, b);
51440 assert_eq_m256(r, a);
51441 let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
51442 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
51443 assert_eq_m256(r, e);
51444 }
51445
51446 #[simd_test(enable = "avx512f,avx512vl")]
51447 unsafe fn test_mm256_maskz_permutevar_ps() {
51448 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51449 let b = _mm256_set1_epi32(0b01);
51450 let r = _mm256_maskz_permutevar_ps(0, a, b);
51451 assert_eq_m256(r, _mm256_setzero_ps());
51452 let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
51453 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
51454 assert_eq_m256(r, e);
51455 }
51456
51457 #[simd_test(enable = "avx512f,avx512vl")]
51458 unsafe fn test_mm_mask_permutevar_ps() {
51459 let a = _mm_set_ps(0., 1., 2., 3.);
51460 let b = _mm_set1_epi32(0b01);
51461 let r = _mm_mask_permutevar_ps(a, 0, a, b);
51462 assert_eq_m128(r, a);
51463 let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
51464 let e = _mm_set_ps(2., 2., 2., 2.);
51465 assert_eq_m128(r, e);
51466 }
51467
51468 #[simd_test(enable = "avx512f,avx512vl")]
51469 unsafe fn test_mm_maskz_permutevar_ps() {
51470 let a = _mm_set_ps(0., 1., 2., 3.);
51471 let b = _mm_set1_epi32(0b01);
51472 let r = _mm_maskz_permutevar_ps(0, a, b);
51473 assert_eq_m128(r, _mm_setzero_ps());
51474 let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
51475 let e = _mm_set_ps(2., 2., 2., 2.);
51476 assert_eq_m128(r, e);
51477 }
51478
51479 #[simd_test(enable = "avx512f")]
51480 unsafe fn test_mm512_permutexvar_epi32() {
51481 let idx = _mm512_set1_epi32(1);
51482 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51483 let r = _mm512_permutexvar_epi32(idx, a);
51484 let e = _mm512_set1_epi32(14);
51485 assert_eq_m512i(r, e);
51486 }
51487
51488 #[simd_test(enable = "avx512f")]
51489 unsafe fn test_mm512_mask_permutexvar_epi32() {
51490 let idx = _mm512_set1_epi32(1);
51491 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51492 let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
51493 assert_eq_m512i(r, a);
51494 let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
51495 let e = _mm512_set1_epi32(14);
51496 assert_eq_m512i(r, e);
51497 }
51498
51499 #[simd_test(enable = "avx512f")]
51500 unsafe fn test_mm512_maskz_permutexvar_epi32() {
51501 let idx = _mm512_set1_epi32(1);
51502 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51503 let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
51504 assert_eq_m512i(r, _mm512_setzero_si512());
51505 let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
51506 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
51507 assert_eq_m512i(r, e);
51508 }
51509
51510 #[simd_test(enable = "avx512f,avx512vl")]
51511 unsafe fn test_mm256_permutexvar_epi32() {
51512 let idx = _mm256_set1_epi32(1);
51513 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51514 let r = _mm256_permutexvar_epi32(idx, a);
51515 let e = _mm256_set1_epi32(6);
51516 assert_eq_m256i(r, e);
51517 }
51518
51519 #[simd_test(enable = "avx512f,avx512vl")]
51520 unsafe fn test_mm256_mask_permutexvar_epi32() {
51521 let idx = _mm256_set1_epi32(1);
51522 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51523 let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
51524 assert_eq_m256i(r, a);
51525 let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
51526 let e = _mm256_set1_epi32(6);
51527 assert_eq_m256i(r, e);
51528 }
51529
51530 #[simd_test(enable = "avx512f,avx512vl")]
51531 unsafe fn test_mm256_maskz_permutexvar_epi32() {
51532 let idx = _mm256_set1_epi32(1);
51533 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51534 let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
51535 assert_eq_m256i(r, _mm256_setzero_si256());
51536 let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
51537 let e = _mm256_set1_epi32(6);
51538 assert_eq_m256i(r, e);
51539 }
51540
51541 #[simd_test(enable = "avx512f")]
51542 unsafe fn test_mm512_permutexvar_ps() {
51543 let idx = _mm512_set1_epi32(1);
51544 let a = _mm512_set_ps(
51545 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51546 );
51547 let r = _mm512_permutexvar_ps(idx, a);
51548 let e = _mm512_set1_ps(14.);
51549 assert_eq_m512(r, e);
51550 }
51551
51552 #[simd_test(enable = "avx512f")]
51553 unsafe fn test_mm512_mask_permutexvar_ps() {
51554 let idx = _mm512_set1_epi32(1);
51555 let a = _mm512_set_ps(
51556 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51557 );
51558 let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
51559 assert_eq_m512(r, a);
51560 let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
51561 let e = _mm512_set1_ps(14.);
51562 assert_eq_m512(r, e);
51563 }
51564
51565 #[simd_test(enable = "avx512f")]
51566 unsafe fn test_mm512_maskz_permutexvar_ps() {
51567 let idx = _mm512_set1_epi32(1);
51568 let a = _mm512_set_ps(
51569 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51570 );
51571 let r = _mm512_maskz_permutexvar_ps(0, idx, a);
51572 assert_eq_m512(r, _mm512_setzero_ps());
51573 let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
51574 let e = _mm512_set_ps(
51575 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
51576 );
51577 assert_eq_m512(r, e);
51578 }
51579
51580 #[simd_test(enable = "avx512f,avx512vl")]
51581 unsafe fn test_mm256_permutexvar_ps() {
51582 let idx = _mm256_set1_epi32(1);
51583 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51584 let r = _mm256_permutexvar_ps(idx, a);
51585 let e = _mm256_set1_ps(6.);
51586 assert_eq_m256(r, e);
51587 }
51588
51589 #[simd_test(enable = "avx512f,avx512vl")]
51590 unsafe fn test_mm256_mask_permutexvar_ps() {
51591 let idx = _mm256_set1_epi32(1);
51592 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51593 let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
51594 assert_eq_m256(r, a);
51595 let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
51596 let e = _mm256_set1_ps(6.);
51597 assert_eq_m256(r, e);
51598 }
51599
51600 #[simd_test(enable = "avx512f,avx512vl")]
51601 unsafe fn test_mm256_maskz_permutexvar_ps() {
51602 let idx = _mm256_set1_epi32(1);
51603 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51604 let r = _mm256_maskz_permutexvar_ps(0, idx, a);
51605 assert_eq_m256(r, _mm256_setzero_ps());
51606 let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
51607 let e = _mm256_set1_ps(6.);
51608 assert_eq_m256(r, e);
51609 }
51610
51611 #[simd_test(enable = "avx512f")]
51612 unsafe fn test_mm512_permutex2var_epi32() {
51613 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51614 #[rustfmt::skip]
51615 let idx = _mm512_set_epi32(
51616 1, 1 << 4, 2, 1 << 4,
51617 3, 1 << 4, 4, 1 << 4,
51618 5, 1 << 4, 6, 1 << 4,
51619 7, 1 << 4, 8, 1 << 4,
51620 );
51621 let b = _mm512_set1_epi32(100);
51622 let r = _mm512_permutex2var_epi32(a, idx, b);
51623 let e = _mm512_set_epi32(
51624 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
51625 );
51626 assert_eq_m512i(r, e);
51627 }
51628
51629 #[simd_test(enable = "avx512f")]
51630 unsafe fn test_mm512_mask_permutex2var_epi32() {
51631 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51632 #[rustfmt::skip]
51633 let idx = _mm512_set_epi32(
51634 1, 1 << 4, 2, 1 << 4,
51635 3, 1 << 4, 4, 1 << 4,
51636 5, 1 << 4, 6, 1 << 4,
51637 7, 1 << 4, 8, 1 << 4,
51638 );
51639 let b = _mm512_set1_epi32(100);
51640 let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
51641 assert_eq_m512i(r, a);
51642 let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
51643 let e = _mm512_set_epi32(
51644 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
51645 );
51646 assert_eq_m512i(r, e);
51647 }
51648
51649 #[simd_test(enable = "avx512f")]
51650 unsafe fn test_mm512_maskz_permutex2var_epi32() {
51651 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51652 #[rustfmt::skip]
51653 let idx = _mm512_set_epi32(
51654 1, 1 << 4, 2, 1 << 4,
51655 3, 1 << 4, 4, 1 << 4,
51656 5, 1 << 4, 6, 1 << 4,
51657 7, 1 << 4, 8, 1 << 4,
51658 );
51659 let b = _mm512_set1_epi32(100);
51660 let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
51661 assert_eq_m512i(r, _mm512_setzero_si512());
51662 let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
51663 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
51664 assert_eq_m512i(r, e);
51665 }
51666
51667 #[simd_test(enable = "avx512f")]
51668 unsafe fn test_mm512_mask2_permutex2var_epi32() {
51669 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51670 #[rustfmt::skip]
51671 let idx = _mm512_set_epi32(
51672 1000, 1 << 4, 2000, 1 << 4,
51673 3000, 1 << 4, 4000, 1 << 4,
51674 5, 1 << 4, 6, 1 << 4,
51675 7, 1 << 4, 8, 1 << 4,
51676 );
51677 let b = _mm512_set1_epi32(100);
51678 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
51679 assert_eq_m512i(r, idx);
51680 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
51681 #[rustfmt::skip]
51682 let e = _mm512_set_epi32(
51683 1000, 1 << 4, 2000, 1 << 4,
51684 3000, 1 << 4, 4000, 1 << 4,
51685 10, 100, 9, 100,
51686 8, 100, 7, 100,
51687 );
51688 assert_eq_m512i(r, e);
51689 }
51690
51691 #[simd_test(enable = "avx512f,avx512vl")]
51692 unsafe fn test_mm256_permutex2var_epi32() {
51693 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51694 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51695 let b = _mm256_set1_epi32(100);
51696 let r = _mm256_permutex2var_epi32(a, idx, b);
51697 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51698 assert_eq_m256i(r, e);
51699 }
51700
51701 #[simd_test(enable = "avx512f,avx512vl")]
51702 unsafe fn test_mm256_mask_permutex2var_epi32() {
51703 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51704 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51705 let b = _mm256_set1_epi32(100);
51706 let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
51707 assert_eq_m256i(r, a);
51708 let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
51709 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51710 assert_eq_m256i(r, e);
51711 }
51712
51713 #[simd_test(enable = "avx512f,avx512vl")]
51714 unsafe fn test_mm256_maskz_permutex2var_epi32() {
51715 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51716 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51717 let b = _mm256_set1_epi32(100);
51718 let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
51719 assert_eq_m256i(r, _mm256_setzero_si256());
51720 let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
51721 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51722 assert_eq_m256i(r, e);
51723 }
51724
51725 #[simd_test(enable = "avx512f,avx512vl")]
51726 unsafe fn test_mm256_mask2_permutex2var_epi32() {
51727 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51728 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51729 let b = _mm256_set1_epi32(100);
51730 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
51731 assert_eq_m256i(r, idx);
51732 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
51733 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51734 assert_eq_m256i(r, e);
51735 }
51736
51737 #[simd_test(enable = "avx512f,avx512vl")]
51738 unsafe fn test_mm_permutex2var_epi32() {
51739 let a = _mm_set_epi32(0, 1, 2, 3);
51740 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51741 let b = _mm_set1_epi32(100);
51742 let r = _mm_permutex2var_epi32(a, idx, b);
51743 let e = _mm_set_epi32(2, 100, 1, 100);
51744 assert_eq_m128i(r, e);
51745 }
51746
51747 #[simd_test(enable = "avx512f,avx512vl")]
51748 unsafe fn test_mm_mask_permutex2var_epi32() {
51749 let a = _mm_set_epi32(0, 1, 2, 3);
51750 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51751 let b = _mm_set1_epi32(100);
51752 let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
51753 assert_eq_m128i(r, a);
51754 let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
51755 let e = _mm_set_epi32(2, 100, 1, 100);
51756 assert_eq_m128i(r, e);
51757 }
51758
51759 #[simd_test(enable = "avx512f,avx512vl")]
51760 unsafe fn test_mm_maskz_permutex2var_epi32() {
51761 let a = _mm_set_epi32(0, 1, 2, 3);
51762 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51763 let b = _mm_set1_epi32(100);
51764 let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
51765 assert_eq_m128i(r, _mm_setzero_si128());
51766 let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
51767 let e = _mm_set_epi32(2, 100, 1, 100);
51768 assert_eq_m128i(r, e);
51769 }
51770
51771 #[simd_test(enable = "avx512f,avx512vl")]
51772 unsafe fn test_mm_mask2_permutex2var_epi32() {
51773 let a = _mm_set_epi32(0, 1, 2, 3);
51774 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51775 let b = _mm_set1_epi32(100);
51776 let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
51777 assert_eq_m128i(r, idx);
51778 let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
51779 let e = _mm_set_epi32(2, 100, 1, 100);
51780 assert_eq_m128i(r, e);
51781 }
51782
51783 #[simd_test(enable = "avx512f")]
51784 unsafe fn test_mm512_permutex2var_ps() {
51785 let a = _mm512_set_ps(
51786 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51787 );
51788 #[rustfmt::skip]
51789 let idx = _mm512_set_epi32(
51790 1, 1 << 4, 2, 1 << 4,
51791 3, 1 << 4, 4, 1 << 4,
51792 5, 1 << 4, 6, 1 << 4,
51793 7, 1 << 4, 8, 1 << 4,
51794 );
51795 let b = _mm512_set1_ps(100.);
51796 let r = _mm512_permutex2var_ps(a, idx, b);
51797 let e = _mm512_set_ps(
51798 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
51799 );
51800 assert_eq_m512(r, e);
51801 }
51802
51803 #[simd_test(enable = "avx512f")]
51804 unsafe fn test_mm512_mask_permutex2var_ps() {
51805 let a = _mm512_set_ps(
51806 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51807 );
51808 #[rustfmt::skip]
51809 let idx = _mm512_set_epi32(
51810 1, 1 << 4, 2, 1 << 4,
51811 3, 1 << 4, 4, 1 << 4,
51812 5, 1 << 4, 6, 1 << 4,
51813 7, 1 << 4, 8, 1 << 4,
51814 );
51815 let b = _mm512_set1_ps(100.);
51816 let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
51817 assert_eq_m512(r, a);
51818 let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
51819 let e = _mm512_set_ps(
51820 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
51821 );
51822 assert_eq_m512(r, e);
51823 }
51824
51825 #[simd_test(enable = "avx512f")]
51826 unsafe fn test_mm512_maskz_permutex2var_ps() {
51827 let a = _mm512_set_ps(
51828 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51829 );
51830 #[rustfmt::skip]
51831 let idx = _mm512_set_epi32(
51832 1, 1 << 4, 2, 1 << 4,
51833 3, 1 << 4, 4, 1 << 4,
51834 5, 1 << 4, 6, 1 << 4,
51835 7, 1 << 4, 8, 1 << 4,
51836 );
51837 let b = _mm512_set1_ps(100.);
51838 let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
51839 assert_eq_m512(r, _mm512_setzero_ps());
51840 let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
51841 let e = _mm512_set_ps(
51842 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
51843 );
51844 assert_eq_m512(r, e);
51845 }
51846
51847 #[simd_test(enable = "avx512f")]
51848 unsafe fn test_mm512_mask2_permutex2var_ps() {
51849 let a = _mm512_set_ps(
51850 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51851 );
51852 #[rustfmt::skip]
51853 let idx = _mm512_set_epi32(
51854 1, 1 << 4, 2, 1 << 4,
51855 3, 1 << 4, 4, 1 << 4,
51856 5, 1 << 4, 6, 1 << 4,
51857 7, 1 << 4, 8, 1 << 4,
51858 );
51859 let b = _mm512_set1_ps(100.);
51860 let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
51861 assert_eq_m512(r, _mm512_castsi512_ps(idx));
51862 let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
51863 let e = _mm512_set_ps(
51864 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
51865 );
51866 assert_eq_m512(r, e);
51867 }
51868
51869 #[simd_test(enable = "avx512f,avx512vl")]
51870 unsafe fn test_mm256_permutex2var_ps() {
51871 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51872 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51873 let b = _mm256_set1_ps(100.);
51874 let r = _mm256_permutex2var_ps(a, idx, b);
51875 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51876 assert_eq_m256(r, e);
51877 }
51878
51879 #[simd_test(enable = "avx512f,avx512vl")]
51880 unsafe fn test_mm256_mask_permutex2var_ps() {
51881 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51882 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51883 let b = _mm256_set1_ps(100.);
51884 let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
51885 assert_eq_m256(r, a);
51886 let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
51887 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51888 assert_eq_m256(r, e);
51889 }
51890
51891 #[simd_test(enable = "avx512f,avx512vl")]
51892 unsafe fn test_mm256_maskz_permutex2var_ps() {
51893 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51894 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51895 let b = _mm256_set1_ps(100.);
51896 let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
51897 assert_eq_m256(r, _mm256_setzero_ps());
51898 let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
51899 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51900 assert_eq_m256(r, e);
51901 }
51902
51903 #[simd_test(enable = "avx512f,avx512vl")]
51904 unsafe fn test_mm256_mask2_permutex2var_ps() {
51905 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51906 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51907 let b = _mm256_set1_ps(100.);
51908 let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
51909 assert_eq_m256(r, _mm256_castsi256_ps(idx));
51910 let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
51911 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51912 assert_eq_m256(r, e);
51913 }
51914
51915 #[simd_test(enable = "avx512f,avx512vl")]
51916 unsafe fn test_mm_permutex2var_ps() {
51917 let a = _mm_set_ps(0., 1., 2., 3.);
51918 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51919 let b = _mm_set1_ps(100.);
51920 let r = _mm_permutex2var_ps(a, idx, b);
51921 let e = _mm_set_ps(2., 100., 1., 100.);
51922 assert_eq_m128(r, e);
51923 }
51924
51925 #[simd_test(enable = "avx512f,avx512vl")]
51926 unsafe fn test_mm_mask_permutex2var_ps() {
51927 let a = _mm_set_ps(0., 1., 2., 3.);
51928 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51929 let b = _mm_set1_ps(100.);
51930 let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
51931 assert_eq_m128(r, a);
51932 let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
51933 let e = _mm_set_ps(2., 100., 1., 100.);
51934 assert_eq_m128(r, e);
51935 }
51936
51937 #[simd_test(enable = "avx512f,avx512vl")]
51938 unsafe fn test_mm_maskz_permutex2var_ps() {
51939 let a = _mm_set_ps(0., 1., 2., 3.);
51940 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51941 let b = _mm_set1_ps(100.);
51942 let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
51943 assert_eq_m128(r, _mm_setzero_ps());
51944 let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
51945 let e = _mm_set_ps(2., 100., 1., 100.);
51946 assert_eq_m128(r, e);
51947 }
51948
51949 #[simd_test(enable = "avx512f,avx512vl")]
51950 unsafe fn test_mm_mask2_permutex2var_ps() {
51951 let a = _mm_set_ps(0., 1., 2., 3.);
51952 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51953 let b = _mm_set1_ps(100.);
51954 let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
51955 assert_eq_m128(r, _mm_castsi128_ps(idx));
51956 let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
51957 let e = _mm_set_ps(2., 100., 1., 100.);
51958 assert_eq_m128(r, e);
51959 }
51960
51961 #[simd_test(enable = "avx512f")]
51962 unsafe fn test_mm512_shuffle_epi32() {
51963 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
51964 let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
51965 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
51966 assert_eq_m512i(r, e);
51967 }
51968
51969 #[simd_test(enable = "avx512f")]
51970 unsafe fn test_mm512_mask_shuffle_epi32() {
51971 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
51972 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
51973 assert_eq_m512i(r, a);
51974 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
51975 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
51976 assert_eq_m512i(r, e);
51977 }
51978
51979 #[simd_test(enable = "avx512f")]
51980 unsafe fn test_mm512_maskz_shuffle_epi32() {
51981 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
51982 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
51983 assert_eq_m512i(r, _mm512_setzero_si512());
51984 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
51985 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
51986 assert_eq_m512i(r, e);
51987 }
51988
51989 #[simd_test(enable = "avx512f,avx512vl")]
51990 unsafe fn test_mm256_mask_shuffle_epi32() {
51991 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
51992 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
51993 assert_eq_m256i(r, a);
51994 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
51995 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
51996 assert_eq_m256i(r, e);
51997 }
51998
51999 #[simd_test(enable = "avx512f,avx512vl")]
52000 unsafe fn test_mm256_maskz_shuffle_epi32() {
52001 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52002 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
52003 assert_eq_m256i(r, _mm256_setzero_si256());
52004 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
52005 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
52006 assert_eq_m256i(r, e);
52007 }
52008
52009 #[simd_test(enable = "avx512f,avx512vl")]
52010 unsafe fn test_mm_mask_shuffle_epi32() {
52011 let a = _mm_set_epi32(1, 4, 5, 8);
52012 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
52013 assert_eq_m128i(r, a);
52014 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
52015 let e = _mm_set_epi32(8, 8, 1, 1);
52016 assert_eq_m128i(r, e);
52017 }
52018
52019 #[simd_test(enable = "avx512f,avx512vl")]
52020 unsafe fn test_mm_maskz_shuffle_epi32() {
52021 let a = _mm_set_epi32(1, 4, 5, 8);
52022 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
52023 assert_eq_m128i(r, _mm_setzero_si128());
52024 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
52025 let e = _mm_set_epi32(8, 8, 1, 1);
52026 assert_eq_m128i(r, e);
52027 }
52028
52029 #[simd_test(enable = "avx512f")]
52030 unsafe fn test_mm512_shuffle_ps() {
52031 let a = _mm512_setr_ps(
52032 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52033 );
52034 let b = _mm512_setr_ps(
52035 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52036 );
52037 let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
52038 let e = _mm512_setr_ps(
52039 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
52040 );
52041 assert_eq_m512(r, e);
52042 }
52043
52044 #[simd_test(enable = "avx512f")]
52045 unsafe fn test_mm512_mask_shuffle_ps() {
52046 let a = _mm512_setr_ps(
52047 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52048 );
52049 let b = _mm512_setr_ps(
52050 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52051 );
52052 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
52053 assert_eq_m512(r, a);
52054 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
52055 let e = _mm512_setr_ps(
52056 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
52057 );
52058 assert_eq_m512(r, e);
52059 }
52060
52061 #[simd_test(enable = "avx512f")]
52062 unsafe fn test_mm512_maskz_shuffle_ps() {
52063 let a = _mm512_setr_ps(
52064 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52065 );
52066 let b = _mm512_setr_ps(
52067 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52068 );
52069 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
52070 assert_eq_m512(r, _mm512_setzero_ps());
52071 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
52072 let e = _mm512_setr_ps(
52073 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
52074 );
52075 assert_eq_m512(r, e);
52076 }
52077
52078 #[simd_test(enable = "avx512f,avx512vl")]
52079 unsafe fn test_mm256_mask_shuffle_ps() {
52080 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52081 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52082 let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
52083 assert_eq_m256(r, a);
52084 let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
52085 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
52086 assert_eq_m256(r, e);
52087 }
52088
52089 #[simd_test(enable = "avx512f,avx512vl")]
52090 unsafe fn test_mm256_maskz_shuffle_ps() {
52091 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52092 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52093 let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
52094 assert_eq_m256(r, _mm256_setzero_ps());
52095 let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
52096 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
52097 assert_eq_m256(r, e);
52098 }
52099
52100 #[simd_test(enable = "avx512f,avx512vl")]
52101 unsafe fn test_mm_mask_shuffle_ps() {
52102 let a = _mm_set_ps(1., 4., 5., 8.);
52103 let b = _mm_set_ps(2., 3., 6., 7.);
52104 let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
52105 assert_eq_m128(r, a);
52106 let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
52107 let e = _mm_set_ps(7., 7., 1., 1.);
52108 assert_eq_m128(r, e);
52109 }
52110
52111 #[simd_test(enable = "avx512f,avx512vl")]
52112 unsafe fn test_mm_maskz_shuffle_ps() {
52113 let a = _mm_set_ps(1., 4., 5., 8.);
52114 let b = _mm_set_ps(2., 3., 6., 7.);
52115 let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
52116 assert_eq_m128(r, _mm_setzero_ps());
52117 let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
52118 let e = _mm_set_ps(7., 7., 1., 1.);
52119 assert_eq_m128(r, e);
52120 }
52121
52122 #[simd_test(enable = "avx512f")]
52123 unsafe fn test_mm512_shuffle_i32x4() {
52124 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
52125 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
52126 let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
52127 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
52128 assert_eq_m512i(r, e);
52129 }
52130
52131 #[simd_test(enable = "avx512f")]
52132 unsafe fn test_mm512_mask_shuffle_i32x4() {
52133 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
52134 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
52135 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
52136 assert_eq_m512i(r, a);
52137 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
52138 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
52139 assert_eq_m512i(r, e);
52140 }
52141
52142 #[simd_test(enable = "avx512f")]
52143 unsafe fn test_mm512_maskz_shuffle_i32x4() {
52144 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
52145 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
52146 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
52147 assert_eq_m512i(r, _mm512_setzero_si512());
52148 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
52149 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
52150 assert_eq_m512i(r, e);
52151 }
52152
52153 #[simd_test(enable = "avx512f,avx512vl")]
52154 unsafe fn test_mm256_shuffle_i32x4() {
52155 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52156 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
52157 let r = _mm256_shuffle_i32x4::<0b00>(a, b);
52158 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
52159 assert_eq_m256i(r, e);
52160 }
52161
52162 #[simd_test(enable = "avx512f,avx512vl")]
52163 unsafe fn test_mm256_mask_shuffle_i32x4() {
52164 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52165 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
52166 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
52167 assert_eq_m256i(r, a);
52168 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
52169 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
52170 assert_eq_m256i(r, e);
52171 }
52172
52173 #[simd_test(enable = "avx512f,avx512vl")]
52174 unsafe fn test_mm256_maskz_shuffle_i32x4() {
52175 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52176 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
52177 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
52178 assert_eq_m256i(r, _mm256_setzero_si256());
52179 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
52180 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
52181 assert_eq_m256i(r, e);
52182 }
52183
52184 #[simd_test(enable = "avx512f")]
52185 unsafe fn test_mm512_shuffle_f32x4() {
52186 let a = _mm512_setr_ps(
52187 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52188 );
52189 let b = _mm512_setr_ps(
52190 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52191 );
52192 let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
52193 let e = _mm512_setr_ps(
52194 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
52195 );
52196 assert_eq_m512(r, e);
52197 }
52198
52199 #[simd_test(enable = "avx512f")]
52200 unsafe fn test_mm512_mask_shuffle_f32x4() {
52201 let a = _mm512_setr_ps(
52202 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52203 );
52204 let b = _mm512_setr_ps(
52205 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52206 );
52207 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
52208 assert_eq_m512(r, a);
52209 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
52210 let e = _mm512_setr_ps(
52211 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
52212 );
52213 assert_eq_m512(r, e);
52214 }
52215
52216 #[simd_test(enable = "avx512f")]
52217 unsafe fn test_mm512_maskz_shuffle_f32x4() {
52218 let a = _mm512_setr_ps(
52219 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52220 );
52221 let b = _mm512_setr_ps(
52222 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52223 );
52224 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
52225 assert_eq_m512(r, _mm512_setzero_ps());
52226 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
52227 let e = _mm512_setr_ps(
52228 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
52229 );
52230 assert_eq_m512(r, e);
52231 }
52232
52233 #[simd_test(enable = "avx512f,avx512vl")]
52234 unsafe fn test_mm256_shuffle_f32x4() {
52235 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52236 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52237 let r = _mm256_shuffle_f32x4::<0b00>(a, b);
52238 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
52239 assert_eq_m256(r, e);
52240 }
52241
52242 #[simd_test(enable = "avx512f,avx512vl")]
52243 unsafe fn test_mm256_mask_shuffle_f32x4() {
52244 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52245 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52246 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
52247 assert_eq_m256(r, a);
52248 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
52249 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
52250 assert_eq_m256(r, e);
52251 }
52252
52253 #[simd_test(enable = "avx512f,avx512vl")]
52254 unsafe fn test_mm256_maskz_shuffle_f32x4() {
52255 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52256 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52257 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
52258 assert_eq_m256(r, _mm256_setzero_ps());
52259 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
52260 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
52261 assert_eq_m256(r, e);
52262 }
52263
52264 #[simd_test(enable = "avx512f")]
52265 unsafe fn test_mm512_extractf32x4_ps() {
52266 let a = _mm512_setr_ps(
52267 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52268 );
52269 let r = _mm512_extractf32x4_ps::<1>(a);
52270 let e = _mm_setr_ps(5., 6., 7., 8.);
52271 assert_eq_m128(r, e);
52272 }
52273
52274 #[simd_test(enable = "avx512f")]
52275 unsafe fn test_mm512_mask_extractf32x4_ps() {
52276 let a = _mm512_setr_ps(
52277 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52278 );
52279 let src = _mm_set1_ps(100.);
52280 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
52281 assert_eq_m128(r, src);
52282 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
52283 let e = _mm_setr_ps(5., 6., 7., 8.);
52284 assert_eq_m128(r, e);
52285 }
52286
52287 #[simd_test(enable = "avx512f")]
52288 unsafe fn test_mm512_maskz_extractf32x4_ps() {
52289 let a = _mm512_setr_ps(
52290 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52291 );
52292 let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
52293 assert_eq_m128(r, _mm_setzero_ps());
52294 let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
52295 let e = _mm_setr_ps(5., 0., 0., 0.);
52296 assert_eq_m128(r, e);
52297 }
52298
52299 #[simd_test(enable = "avx512f,avx512vl")]
52300 unsafe fn test_mm256_extractf32x4_ps() {
52301 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52302 let r = _mm256_extractf32x4_ps::<1>(a);
52303 let e = _mm_set_ps(1., 2., 3., 4.);
52304 assert_eq_m128(r, e);
52305 }
52306
52307 #[simd_test(enable = "avx512f,avx512vl")]
52308 unsafe fn test_mm256_mask_extractf32x4_ps() {
52309 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52310 let src = _mm_set1_ps(100.);
52311 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
52312 assert_eq_m128(r, src);
52313 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
52314 let e = _mm_set_ps(1., 2., 3., 4.);
52315 assert_eq_m128(r, e);
52316 }
52317
52318 #[simd_test(enable = "avx512f,avx512vl")]
52319 unsafe fn test_mm256_maskz_extractf32x4_ps() {
52320 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52321 let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
52322 assert_eq_m128(r, _mm_setzero_ps());
52323 let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
52324 let e = _mm_set_ps(1., 2., 3., 4.);
52325 assert_eq_m128(r, e);
52326 }
52327
52328 #[simd_test(enable = "avx512f")]
52329 unsafe fn test_mm512_extracti32x4_epi32() {
52330 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52331 let r = _mm512_extracti32x4_epi32::<1>(a);
52332 let e = _mm_setr_epi32(5, 6, 7, 8);
52333 assert_eq_m128i(r, e);
52334 }
52335
52336 #[simd_test(enable = "avx512f")]
52337 unsafe fn test_mm512_mask_extracti32x4_epi32() {
52338 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52339 let src = _mm_set1_epi32(100);
52340 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
52341 assert_eq_m128i(r, src);
52342 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
52343 let e = _mm_setr_epi32(5, 6, 7, 8);
52344 assert_eq_m128i(r, e);
52345 }
52346
52347 #[simd_test(enable = "avx512f,avx512vl")]
52348 unsafe fn test_mm512_maskz_extracti32x4_epi32() {
52349 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52350 let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
52351 assert_eq_m128i(r, _mm_setzero_si128());
52352 let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
52353 let e = _mm_setr_epi32(5, 0, 0, 0);
52354 assert_eq_m128i(r, e);
52355 }
52356
52357 #[simd_test(enable = "avx512f,avx512vl")]
52358 unsafe fn test_mm256_extracti32x4_epi32() {
52359 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52360 let r = _mm256_extracti32x4_epi32::<1>(a);
52361 let e = _mm_set_epi32(1, 2, 3, 4);
52362 assert_eq_m128i(r, e);
52363 }
52364
52365 #[simd_test(enable = "avx512f,avx512vl")]
52366 unsafe fn test_mm256_mask_extracti32x4_epi32() {
52367 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52368 let src = _mm_set1_epi32(100);
52369 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
52370 assert_eq_m128i(r, src);
52371 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
52372 let e = _mm_set_epi32(1, 2, 3, 4);
52373 assert_eq_m128i(r, e);
52374 }
52375
52376 #[simd_test(enable = "avx512f,avx512vl")]
52377 unsafe fn test_mm256_maskz_extracti32x4_epi32() {
52378 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52379 let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
52380 assert_eq_m128i(r, _mm_setzero_si128());
52381 let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
52382 let e = _mm_set_epi32(1, 2, 3, 4);
52383 assert_eq_m128i(r, e);
52384 }
52385
52386 #[simd_test(enable = "avx512f")]
52387 unsafe fn test_mm512_moveldup_ps() {
52388 let a = _mm512_setr_ps(
52389 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52390 );
52391 let r = _mm512_moveldup_ps(a);
52392 let e = _mm512_setr_ps(
52393 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
52394 );
52395 assert_eq_m512(r, e);
52396 }
52397
52398 #[simd_test(enable = "avx512f")]
52399 unsafe fn test_mm512_mask_moveldup_ps() {
52400 let a = _mm512_setr_ps(
52401 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52402 );
52403 let r = _mm512_mask_moveldup_ps(a, 0, a);
52404 assert_eq_m512(r, a);
52405 let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
52406 let e = _mm512_setr_ps(
52407 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
52408 );
52409 assert_eq_m512(r, e);
52410 }
52411
52412 #[simd_test(enable = "avx512f")]
52413 unsafe fn test_mm512_maskz_moveldup_ps() {
52414 let a = _mm512_setr_ps(
52415 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52416 );
52417 let r = _mm512_maskz_moveldup_ps(0, a);
52418 assert_eq_m512(r, _mm512_setzero_ps());
52419 let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
52420 let e = _mm512_setr_ps(
52421 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
52422 );
52423 assert_eq_m512(r, e);
52424 }
52425
52426 #[simd_test(enable = "avx512f,avx512vl")]
52427 unsafe fn test_mm256_mask_moveldup_ps() {
52428 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52429 let r = _mm256_mask_moveldup_ps(a, 0, a);
52430 assert_eq_m256(r, a);
52431 let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
52432 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
52433 assert_eq_m256(r, e);
52434 }
52435
52436 #[simd_test(enable = "avx512f,avx512vl")]
52437 unsafe fn test_mm256_maskz_moveldup_ps() {
52438 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52439 let r = _mm256_maskz_moveldup_ps(0, a);
52440 assert_eq_m256(r, _mm256_setzero_ps());
52441 let r = _mm256_maskz_moveldup_ps(0b11111111, a);
52442 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
52443 assert_eq_m256(r, e);
52444 }
52445
52446 #[simd_test(enable = "avx512f,avx512vl")]
52447 unsafe fn test_mm_mask_moveldup_ps() {
52448 let a = _mm_set_ps(1., 2., 3., 4.);
52449 let r = _mm_mask_moveldup_ps(a, 0, a);
52450 assert_eq_m128(r, a);
52451 let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
52452 let e = _mm_set_ps(2., 2., 4., 4.);
52453 assert_eq_m128(r, e);
52454 }
52455
52456 #[simd_test(enable = "avx512f,avx512vl")]
52457 unsafe fn test_mm_maskz_moveldup_ps() {
52458 let a = _mm_set_ps(1., 2., 3., 4.);
52459 let r = _mm_maskz_moveldup_ps(0, a);
52460 assert_eq_m128(r, _mm_setzero_ps());
52461 let r = _mm_maskz_moveldup_ps(0b00001111, a);
52462 let e = _mm_set_ps(2., 2., 4., 4.);
52463 assert_eq_m128(r, e);
52464 }
52465
52466 #[simd_test(enable = "avx512f")]
52467 unsafe fn test_mm512_movehdup_ps() {
52468 let a = _mm512_setr_ps(
52469 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52470 );
52471 let r = _mm512_movehdup_ps(a);
52472 let e = _mm512_setr_ps(
52473 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
52474 );
52475 assert_eq_m512(r, e);
52476 }
52477
52478 #[simd_test(enable = "avx512f")]
52479 unsafe fn test_mm512_mask_movehdup_ps() {
52480 let a = _mm512_setr_ps(
52481 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52482 );
52483 let r = _mm512_mask_movehdup_ps(a, 0, a);
52484 assert_eq_m512(r, a);
52485 let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
52486 let e = _mm512_setr_ps(
52487 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
52488 );
52489 assert_eq_m512(r, e);
52490 }
52491
52492 #[simd_test(enable = "avx512f")]
52493 unsafe fn test_mm512_maskz_movehdup_ps() {
52494 let a = _mm512_setr_ps(
52495 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52496 );
52497 let r = _mm512_maskz_movehdup_ps(0, a);
52498 assert_eq_m512(r, _mm512_setzero_ps());
52499 let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
52500 let e = _mm512_setr_ps(
52501 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
52502 );
52503 assert_eq_m512(r, e);
52504 }
52505
52506 #[simd_test(enable = "avx512f,avx512vl")]
52507 unsafe fn test_mm256_mask_movehdup_ps() {
52508 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52509 let r = _mm256_mask_movehdup_ps(a, 0, a);
52510 assert_eq_m256(r, a);
52511 let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
52512 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
52513 assert_eq_m256(r, e);
52514 }
52515
52516 #[simd_test(enable = "avx512f,avx512vl")]
52517 unsafe fn test_mm256_maskz_movehdup_ps() {
52518 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52519 let r = _mm256_maskz_movehdup_ps(0, a);
52520 assert_eq_m256(r, _mm256_setzero_ps());
52521 let r = _mm256_maskz_movehdup_ps(0b11111111, a);
52522 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
52523 assert_eq_m256(r, e);
52524 }
52525
52526 #[simd_test(enable = "avx512f,avx512vl")]
52527 unsafe fn test_mm_mask_movehdup_ps() {
52528 let a = _mm_set_ps(1., 2., 3., 4.);
52529 let r = _mm_mask_movehdup_ps(a, 0, a);
52530 assert_eq_m128(r, a);
52531 let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
52532 let e = _mm_set_ps(1., 1., 3., 3.);
52533 assert_eq_m128(r, e);
52534 }
52535
52536 #[simd_test(enable = "avx512f,avx512vl")]
52537 unsafe fn test_mm_maskz_movehdup_ps() {
52538 let a = _mm_set_ps(1., 2., 3., 4.);
52539 let r = _mm_maskz_movehdup_ps(0, a);
52540 assert_eq_m128(r, _mm_setzero_ps());
52541 let r = _mm_maskz_movehdup_ps(0b00001111, a);
52542 let e = _mm_set_ps(1., 1., 3., 3.);
52543 assert_eq_m128(r, e);
52544 }
52545
52546 #[simd_test(enable = "avx512f")]
52547 unsafe fn test_mm512_inserti32x4() {
52548 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52549 let b = _mm_setr_epi32(17, 18, 19, 20);
52550 let r = _mm512_inserti32x4::<0>(a, b);
52551 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52552 assert_eq_m512i(r, e);
52553 }
52554
52555 #[simd_test(enable = "avx512f")]
52556 unsafe fn test_mm512_mask_inserti32x4() {
52557 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52558 let b = _mm_setr_epi32(17, 18, 19, 20);
52559 let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
52560 assert_eq_m512i(r, a);
52561 let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
52562 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52563 assert_eq_m512i(r, e);
52564 }
52565
52566 #[simd_test(enable = "avx512f")]
52567 unsafe fn test_mm512_maskz_inserti32x4() {
52568 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52569 let b = _mm_setr_epi32(17, 18, 19, 20);
52570 let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
52571 assert_eq_m512i(r, _mm512_setzero_si512());
52572 let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
52573 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
52574 assert_eq_m512i(r, e);
52575 }
52576
52577 #[simd_test(enable = "avx512f,avx512vl")]
52578 unsafe fn test_mm256_inserti32x4() {
52579 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52580 let b = _mm_set_epi32(17, 18, 19, 20);
52581 let r = _mm256_inserti32x4::<1>(a, b);
52582 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
52583 assert_eq_m256i(r, e);
52584 }
52585
52586 #[simd_test(enable = "avx512f,avx512vl")]
52587 unsafe fn test_mm256_mask_inserti32x4() {
52588 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52589 let b = _mm_set_epi32(17, 18, 19, 20);
52590 let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
52591 assert_eq_m256i(r, a);
52592 let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
52593 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
52594 assert_eq_m256i(r, e);
52595 }
52596
52597 #[simd_test(enable = "avx512f,avx512vl")]
52598 unsafe fn test_mm256_maskz_inserti32x4() {
52599 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52600 let b = _mm_set_epi32(17, 18, 19, 20);
52601 let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
52602 assert_eq_m256i(r, _mm256_setzero_si256());
52603 let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
52604 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
52605 assert_eq_m256i(r, e);
52606 }
52607
52608 #[simd_test(enable = "avx512f")]
52609 unsafe fn test_mm512_insertf32x4() {
52610 let a = _mm512_setr_ps(
52611 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52612 );
52613 let b = _mm_setr_ps(17., 18., 19., 20.);
52614 let r = _mm512_insertf32x4::<0>(a, b);
52615 let e = _mm512_setr_ps(
52616 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52617 );
52618 assert_eq_m512(r, e);
52619 }
52620
52621 #[simd_test(enable = "avx512f")]
52622 unsafe fn test_mm512_mask_insertf32x4() {
52623 let a = _mm512_setr_ps(
52624 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52625 );
52626 let b = _mm_setr_ps(17., 18., 19., 20.);
52627 let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
52628 assert_eq_m512(r, a);
52629 let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
52630 let e = _mm512_setr_ps(
52631 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52632 );
52633 assert_eq_m512(r, e);
52634 }
52635
52636 #[simd_test(enable = "avx512f")]
52637 unsafe fn test_mm512_maskz_insertf32x4() {
52638 let a = _mm512_setr_ps(
52639 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52640 );
52641 let b = _mm_setr_ps(17., 18., 19., 20.);
52642 let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
52643 assert_eq_m512(r, _mm512_setzero_ps());
52644 let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
52645 let e = _mm512_setr_ps(
52646 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
52647 );
52648 assert_eq_m512(r, e);
52649 }
52650
52651 #[simd_test(enable = "avx512f,avx512vl")]
52652 unsafe fn test_mm256_insertf32x4() {
52653 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52654 let b = _mm_set_ps(17., 18., 19., 20.);
52655 let r = _mm256_insertf32x4::<1>(a, b);
52656 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
52657 assert_eq_m256(r, e);
52658 }
52659
52660 #[simd_test(enable = "avx512f,avx512vl")]
52661 unsafe fn test_mm256_mask_insertf32x4() {
52662 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52663 let b = _mm_set_ps(17., 18., 19., 20.);
52664 let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
52665 assert_eq_m256(r, a);
52666 let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
52667 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
52668 assert_eq_m256(r, e);
52669 }
52670
52671 #[simd_test(enable = "avx512f,avx512vl")]
52672 unsafe fn test_mm256_maskz_insertf32x4() {
52673 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52674 let b = _mm_set_ps(17., 18., 19., 20.);
52675 let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
52676 assert_eq_m256(r, _mm256_setzero_ps());
52677 let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
52678 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
52679 assert_eq_m256(r, e);
52680 }
52681
52682 #[simd_test(enable = "avx512f")]
52683 unsafe fn test_mm512_castps128_ps512() {
52684 let a = _mm_setr_ps(17., 18., 19., 20.);
52685 let r = _mm512_castps128_ps512(a);
52686 let e = _mm512_setr_ps(
52687 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
52688 );
52689 assert_eq_m512(r, e);
52690 }
52691
52692 #[simd_test(enable = "avx512f")]
52693 unsafe fn test_mm512_castps256_ps512() {
52694 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
52695 let r = _mm512_castps256_ps512(a);
52696 let e = _mm512_setr_ps(
52697 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
52698 );
52699 assert_eq_m512(r, e);
52700 }
52701
52702 #[simd_test(enable = "avx512f")]
52703 unsafe fn test_mm512_zextps128_ps512() {
52704 let a = _mm_setr_ps(17., 18., 19., 20.);
52705 let r = _mm512_zextps128_ps512(a);
52706 let e = _mm512_setr_ps(
52707 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
52708 );
52709 assert_eq_m512(r, e);
52710 }
52711
52712 #[simd_test(enable = "avx512f")]
52713 unsafe fn test_mm512_zextps256_ps512() {
52714 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
52715 let r = _mm512_zextps256_ps512(a);
52716 let e = _mm512_setr_ps(
52717 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
52718 );
52719 assert_eq_m512(r, e);
52720 }
52721
52722 #[simd_test(enable = "avx512f")]
52723 unsafe fn test_mm512_castps512_ps128() {
52724 let a = _mm512_setr_ps(
52725 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
52726 );
52727 let r = _mm512_castps512_ps128(a);
52728 let e = _mm_setr_ps(17., 18., 19., 20.);
52729 assert_eq_m128(r, e);
52730 }
52731
52732 #[simd_test(enable = "avx512f")]
52733 unsafe fn test_mm512_castps512_ps256() {
52734 let a = _mm512_setr_ps(
52735 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
52736 );
52737 let r = _mm512_castps512_ps256(a);
52738 let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
52739 assert_eq_m256(r, e);
52740 }
52741
52742 #[simd_test(enable = "avx512f")]
52743 unsafe fn test_mm512_castps_pd() {
52744 let a = _mm512_set1_ps(1.);
52745 let r = _mm512_castps_pd(a);
52746 let e = _mm512_set1_pd(0.007812501848093234);
52747 assert_eq_m512d(r, e);
52748 }
52749
52750 #[simd_test(enable = "avx512f")]
52751 unsafe fn test_mm512_castps_si512() {
52752 let a = _mm512_set1_ps(1.);
52753 let r = _mm512_castps_si512(a);
52754 let e = _mm512_set1_epi32(1065353216);
52755 assert_eq_m512i(r, e);
52756 }
52757
52758 #[simd_test(enable = "avx512f")]
52759 unsafe fn test_mm512_broadcastd_epi32() {
52760 let a = _mm_set_epi32(17, 18, 19, 20);
52761 let r = _mm512_broadcastd_epi32(a);
52762 let e = _mm512_set1_epi32(20);
52763 assert_eq_m512i(r, e);
52764 }
52765
52766 #[simd_test(enable = "avx512f")]
52767 unsafe fn test_mm512_mask_broadcastd_epi32() {
52768 let src = _mm512_set1_epi32(20);
52769 let a = _mm_set_epi32(17, 18, 19, 20);
52770 let r = _mm512_mask_broadcastd_epi32(src, 0, a);
52771 assert_eq_m512i(r, src);
52772 let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
52773 let e = _mm512_set1_epi32(20);
52774 assert_eq_m512i(r, e);
52775 }
52776
52777 #[simd_test(enable = "avx512f")]
52778 unsafe fn test_mm512_maskz_broadcastd_epi32() {
52779 let a = _mm_set_epi32(17, 18, 19, 20);
52780 let r = _mm512_maskz_broadcastd_epi32(0, a);
52781 assert_eq_m512i(r, _mm512_setzero_si512());
52782 let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
52783 let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
52784 assert_eq_m512i(r, e);
52785 }
52786
52787 #[simd_test(enable = "avx512f,avx512vl")]
52788 unsafe fn test_mm256_mask_broadcastd_epi32() {
52789 let src = _mm256_set1_epi32(20);
52790 let a = _mm_set_epi32(17, 18, 19, 20);
52791 let r = _mm256_mask_broadcastd_epi32(src, 0, a);
52792 assert_eq_m256i(r, src);
52793 let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
52794 let e = _mm256_set1_epi32(20);
52795 assert_eq_m256i(r, e);
52796 }
52797
52798 #[simd_test(enable = "avx512f,avx512vl")]
52799 unsafe fn test_mm256_maskz_broadcastd_epi32() {
52800 let a = _mm_set_epi32(17, 18, 19, 20);
52801 let r = _mm256_maskz_broadcastd_epi32(0, a);
52802 assert_eq_m256i(r, _mm256_setzero_si256());
52803 let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
52804 let e = _mm256_set1_epi32(20);
52805 assert_eq_m256i(r, e);
52806 }
52807
52808 #[simd_test(enable = "avx512f,avx512vl")]
52809 unsafe fn test_mm_mask_broadcastd_epi32() {
52810 let src = _mm_set1_epi32(20);
52811 let a = _mm_set_epi32(17, 18, 19, 20);
52812 let r = _mm_mask_broadcastd_epi32(src, 0, a);
52813 assert_eq_m128i(r, src);
52814 let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
52815 let e = _mm_set1_epi32(20);
52816 assert_eq_m128i(r, e);
52817 }
52818
52819 #[simd_test(enable = "avx512f,avx512vl")]
52820 unsafe fn test_mm_maskz_broadcastd_epi32() {
52821 let a = _mm_set_epi32(17, 18, 19, 20);
52822 let r = _mm_maskz_broadcastd_epi32(0, a);
52823 assert_eq_m128i(r, _mm_setzero_si128());
52824 let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
52825 let e = _mm_set1_epi32(20);
52826 assert_eq_m128i(r, e);
52827 }
52828
52829 #[simd_test(enable = "avx512f")]
52830 unsafe fn test_mm512_broadcastss_ps() {
52831 let a = _mm_set_ps(17., 18., 19., 20.);
52832 let r = _mm512_broadcastss_ps(a);
52833 let e = _mm512_set1_ps(20.);
52834 assert_eq_m512(r, e);
52835 }
52836
52837 #[simd_test(enable = "avx512f")]
52838 unsafe fn test_mm512_mask_broadcastss_ps() {
52839 let src = _mm512_set1_ps(20.);
52840 let a = _mm_set_ps(17., 18., 19., 20.);
52841 let r = _mm512_mask_broadcastss_ps(src, 0, a);
52842 assert_eq_m512(r, src);
52843 let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
52844 let e = _mm512_set1_ps(20.);
52845 assert_eq_m512(r, e);
52846 }
52847
52848 #[simd_test(enable = "avx512f")]
52849 unsafe fn test_mm512_maskz_broadcastss_ps() {
52850 let a = _mm_set_ps(17., 18., 19., 20.);
52851 let r = _mm512_maskz_broadcastss_ps(0, a);
52852 assert_eq_m512(r, _mm512_setzero_ps());
52853 let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
52854 let e = _mm512_setr_ps(
52855 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
52856 );
52857 assert_eq_m512(r, e);
52858 }
52859
52860 #[simd_test(enable = "avx512f,avx512vl")]
52861 unsafe fn test_mm256_mask_broadcastss_ps() {
52862 let src = _mm256_set1_ps(20.);
52863 let a = _mm_set_ps(17., 18., 19., 20.);
52864 let r = _mm256_mask_broadcastss_ps(src, 0, a);
52865 assert_eq_m256(r, src);
52866 let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
52867 let e = _mm256_set1_ps(20.);
52868 assert_eq_m256(r, e);
52869 }
52870
52871 #[simd_test(enable = "avx512f,avx512vl")]
52872 unsafe fn test_mm256_maskz_broadcastss_ps() {
52873 let a = _mm_set_ps(17., 18., 19., 20.);
52874 let r = _mm256_maskz_broadcastss_ps(0, a);
52875 assert_eq_m256(r, _mm256_setzero_ps());
52876 let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
52877 let e = _mm256_set1_ps(20.);
52878 assert_eq_m256(r, e);
52879 }
52880
52881 #[simd_test(enable = "avx512f,avx512vl")]
52882 unsafe fn test_mm_mask_broadcastss_ps() {
52883 let src = _mm_set1_ps(20.);
52884 let a = _mm_set_ps(17., 18., 19., 20.);
52885 let r = _mm_mask_broadcastss_ps(src, 0, a);
52886 assert_eq_m128(r, src);
52887 let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
52888 let e = _mm_set1_ps(20.);
52889 assert_eq_m128(r, e);
52890 }
52891
52892 #[simd_test(enable = "avx512f,avx512vl")]
52893 unsafe fn test_mm_maskz_broadcastss_ps() {
52894 let a = _mm_set_ps(17., 18., 19., 20.);
52895 let r = _mm_maskz_broadcastss_ps(0, a);
52896 assert_eq_m128(r, _mm_setzero_ps());
52897 let r = _mm_maskz_broadcastss_ps(0b00001111, a);
52898 let e = _mm_set1_ps(20.);
52899 assert_eq_m128(r, e);
52900 }
52901
52902 #[simd_test(enable = "avx512f")]
52903 unsafe fn test_mm512_broadcast_i32x4() {
52904 let a = _mm_set_epi32(17, 18, 19, 20);
52905 let r = _mm512_broadcast_i32x4(a);
52906 let e = _mm512_set_epi32(
52907 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
52908 );
52909 assert_eq_m512i(r, e);
52910 }
52911
52912 #[simd_test(enable = "avx512f")]
52913 unsafe fn test_mm512_mask_broadcast_i32x4() {
52914 let src = _mm512_set1_epi32(20);
52915 let a = _mm_set_epi32(17, 18, 19, 20);
52916 let r = _mm512_mask_broadcast_i32x4(src, 0, a);
52917 assert_eq_m512i(r, src);
52918 let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
52919 let e = _mm512_set_epi32(
52920 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
52921 );
52922 assert_eq_m512i(r, e);
52923 }
52924
52925 #[simd_test(enable = "avx512f")]
52926 unsafe fn test_mm512_maskz_broadcast_i32x4() {
52927 let a = _mm_set_epi32(17, 18, 19, 20);
52928 let r = _mm512_maskz_broadcast_i32x4(0, a);
52929 assert_eq_m512i(r, _mm512_setzero_si512());
52930 let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
52931 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
52932 assert_eq_m512i(r, e);
52933 }
52934
52935 #[simd_test(enable = "avx512f,avx512vl")]
52936 unsafe fn test_mm256_broadcast_i32x4() {
52937 let a = _mm_set_epi32(17, 18, 19, 20);
52938 let r = _mm256_broadcast_i32x4(a);
52939 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
52940 assert_eq_m256i(r, e);
52941 }
52942
52943 #[simd_test(enable = "avx512f,avx512vl")]
52944 unsafe fn test_mm256_mask_broadcast_i32x4() {
52945 let src = _mm256_set1_epi32(20);
52946 let a = _mm_set_epi32(17, 18, 19, 20);
52947 let r = _mm256_mask_broadcast_i32x4(src, 0, a);
52948 assert_eq_m256i(r, src);
52949 let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
52950 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
52951 assert_eq_m256i(r, e);
52952 }
52953
52954 #[simd_test(enable = "avx512f,avx512vl")]
52955 unsafe fn test_mm256_maskz_broadcast_i32x4() {
52956 let a = _mm_set_epi32(17, 18, 19, 20);
52957 let r = _mm256_maskz_broadcast_i32x4(0, a);
52958 assert_eq_m256i(r, _mm256_setzero_si256());
52959 let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
52960 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
52961 assert_eq_m256i(r, e);
52962 }
52963
52964 #[simd_test(enable = "avx512f")]
52965 unsafe fn test_mm512_broadcast_f32x4() {
52966 let a = _mm_set_ps(17., 18., 19., 20.);
52967 let r = _mm512_broadcast_f32x4(a);
52968 let e = _mm512_set_ps(
52969 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
52970 );
52971 assert_eq_m512(r, e);
52972 }
52973
52974 #[simd_test(enable = "avx512f")]
52975 unsafe fn test_mm512_mask_broadcast_f32x4() {
52976 let src = _mm512_set1_ps(20.);
52977 let a = _mm_set_ps(17., 18., 19., 20.);
52978 let r = _mm512_mask_broadcast_f32x4(src, 0, a);
52979 assert_eq_m512(r, src);
52980 let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
52981 let e = _mm512_set_ps(
52982 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
52983 );
52984 assert_eq_m512(r, e);
52985 }
52986
52987 #[simd_test(enable = "avx512f")]
52988 unsafe fn test_mm512_maskz_broadcast_f32x4() {
52989 let a = _mm_set_ps(17., 18., 19., 20.);
52990 let r = _mm512_maskz_broadcast_f32x4(0, a);
52991 assert_eq_m512(r, _mm512_setzero_ps());
52992 let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
52993 let e = _mm512_set_ps(
52994 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
52995 );
52996 assert_eq_m512(r, e);
52997 }
52998
52999 #[simd_test(enable = "avx512f,avx512vl")]
53000 unsafe fn test_mm256_broadcast_f32x4() {
53001 let a = _mm_set_ps(17., 18., 19., 20.);
53002 let r = _mm256_broadcast_f32x4(a);
53003 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
53004 assert_eq_m256(r, e);
53005 }
53006
53007 #[simd_test(enable = "avx512f,avx512vl")]
53008 unsafe fn test_mm256_mask_broadcast_f32x4() {
53009 let src = _mm256_set1_ps(20.);
53010 let a = _mm_set_ps(17., 18., 19., 20.);
53011 let r = _mm256_mask_broadcast_f32x4(src, 0, a);
53012 assert_eq_m256(r, src);
53013 let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
53014 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
53015 assert_eq_m256(r, e);
53016 }
53017
53018 #[simd_test(enable = "avx512f,avx512vl")]
53019 unsafe fn test_mm256_maskz_broadcast_f32x4() {
53020 let a = _mm_set_ps(17., 18., 19., 20.);
53021 let r = _mm256_maskz_broadcast_f32x4(0, a);
53022 assert_eq_m256(r, _mm256_setzero_ps());
53023 let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
53024 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
53025 assert_eq_m256(r, e);
53026 }
53027
53028 #[simd_test(enable = "avx512f")]
53029 unsafe fn test_mm512_mask_blend_epi32() {
53030 let a = _mm512_set1_epi32(1);
53031 let b = _mm512_set1_epi32(2);
53032 let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
53033 let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
53034 assert_eq_m512i(r, e);
53035 }
53036
53037 #[simd_test(enable = "avx512f,avx512vl")]
53038 unsafe fn test_mm256_mask_blend_epi32() {
53039 let a = _mm256_set1_epi32(1);
53040 let b = _mm256_set1_epi32(2);
53041 let r = _mm256_mask_blend_epi32(0b11111111, a, b);
53042 let e = _mm256_set1_epi32(2);
53043 assert_eq_m256i(r, e);
53044 }
53045
53046 #[simd_test(enable = "avx512f,avx512vl")]
53047 unsafe fn test_mm_mask_blend_epi32() {
53048 let a = _mm_set1_epi32(1);
53049 let b = _mm_set1_epi32(2);
53050 let r = _mm_mask_blend_epi32(0b00001111, a, b);
53051 let e = _mm_set1_epi32(2);
53052 assert_eq_m128i(r, e);
53053 }
53054
53055 #[simd_test(enable = "avx512f")]
53056 unsafe fn test_mm512_mask_blend_ps() {
53057 let a = _mm512_set1_ps(1.);
53058 let b = _mm512_set1_ps(2.);
53059 let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
53060 let e = _mm512_set_ps(
53061 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
53062 );
53063 assert_eq_m512(r, e);
53064 }
53065
53066 #[simd_test(enable = "avx512f,avx512vl")]
53067 unsafe fn test_mm256_mask_blend_ps() {
53068 let a = _mm256_set1_ps(1.);
53069 let b = _mm256_set1_ps(2.);
53070 let r = _mm256_mask_blend_ps(0b11111111, a, b);
53071 let e = _mm256_set1_ps(2.);
53072 assert_eq_m256(r, e);
53073 }
53074
53075 #[simd_test(enable = "avx512f,avx512vl")]
53076 unsafe fn test_mm_mask_blend_ps() {
53077 let a = _mm_set1_ps(1.);
53078 let b = _mm_set1_ps(2.);
53079 let r = _mm_mask_blend_ps(0b00001111, a, b);
53080 let e = _mm_set1_ps(2.);
53081 assert_eq_m128(r, e);
53082 }
53083
53084 #[simd_test(enable = "avx512f")]
53085 unsafe fn test_mm512_unpackhi_epi32() {
53086 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53087 let b = _mm512_set_epi32(
53088 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53089 );
53090 let r = _mm512_unpackhi_epi32(a, b);
53091 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
53092 assert_eq_m512i(r, e);
53093 }
53094
53095 #[simd_test(enable = "avx512f")]
53096 unsafe fn test_mm512_mask_unpackhi_epi32() {
53097 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53098 let b = _mm512_set_epi32(
53099 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53100 );
53101 let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
53102 assert_eq_m512i(r, a);
53103 let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
53104 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
53105 assert_eq_m512i(r, e);
53106 }
53107
53108 #[simd_test(enable = "avx512f")]
53109 unsafe fn test_mm512_maskz_unpackhi_epi32() {
53110 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53111 let b = _mm512_set_epi32(
53112 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53113 );
53114 let r = _mm512_maskz_unpackhi_epi32(0, a, b);
53115 assert_eq_m512i(r, _mm512_setzero_si512());
53116 let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
53117 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
53118 assert_eq_m512i(r, e);
53119 }
53120
53121 #[simd_test(enable = "avx512f,avx512vl")]
53122 unsafe fn test_mm256_mask_unpackhi_epi32() {
53123 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53124 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53125 let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
53126 assert_eq_m256i(r, a);
53127 let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
53128 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
53129 assert_eq_m256i(r, e);
53130 }
53131
53132 #[simd_test(enable = "avx512f,avx512vl")]
53133 unsafe fn test_mm256_maskz_unpackhi_epi32() {
53134 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53135 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53136 let r = _mm256_maskz_unpackhi_epi32(0, a, b);
53137 assert_eq_m256i(r, _mm256_setzero_si256());
53138 let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
53139 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
53140 assert_eq_m256i(r, e);
53141 }
53142
53143 #[simd_test(enable = "avx512f,avx512vl")]
53144 unsafe fn test_mm_mask_unpackhi_epi32() {
53145 let a = _mm_set_epi32(1, 2, 3, 4);
53146 let b = _mm_set_epi32(17, 18, 19, 20);
53147 let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
53148 assert_eq_m128i(r, a);
53149 let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
53150 let e = _mm_set_epi32(17, 1, 18, 2);
53151 assert_eq_m128i(r, e);
53152 }
53153
53154 #[simd_test(enable = "avx512f,avx512vl")]
53155 unsafe fn test_mm_maskz_unpackhi_epi32() {
53156 let a = _mm_set_epi32(1, 2, 3, 4);
53157 let b = _mm_set_epi32(17, 18, 19, 20);
53158 let r = _mm_maskz_unpackhi_epi32(0, a, b);
53159 assert_eq_m128i(r, _mm_setzero_si128());
53160 let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
53161 let e = _mm_set_epi32(17, 1, 18, 2);
53162 assert_eq_m128i(r, e);
53163 }
53164
53165 #[simd_test(enable = "avx512f")]
53166 unsafe fn test_mm512_unpackhi_ps() {
53167 let a = _mm512_set_ps(
53168 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53169 );
53170 let b = _mm512_set_ps(
53171 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53172 );
53173 let r = _mm512_unpackhi_ps(a, b);
53174 let e = _mm512_set_ps(
53175 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
53176 );
53177 assert_eq_m512(r, e);
53178 }
53179
53180 #[simd_test(enable = "avx512f")]
53181 unsafe fn test_mm512_mask_unpackhi_ps() {
53182 let a = _mm512_set_ps(
53183 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53184 );
53185 let b = _mm512_set_ps(
53186 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53187 );
53188 let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
53189 assert_eq_m512(r, a);
53190 let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
53191 let e = _mm512_set_ps(
53192 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
53193 );
53194 assert_eq_m512(r, e);
53195 }
53196
53197 #[simd_test(enable = "avx512f")]
53198 unsafe fn test_mm512_maskz_unpackhi_ps() {
53199 let a = _mm512_set_ps(
53200 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53201 );
53202 let b = _mm512_set_ps(
53203 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53204 );
53205 let r = _mm512_maskz_unpackhi_ps(0, a, b);
53206 assert_eq_m512(r, _mm512_setzero_ps());
53207 let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
53208 let e = _mm512_set_ps(
53209 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
53210 );
53211 assert_eq_m512(r, e);
53212 }
53213
53214 #[simd_test(enable = "avx512f,avx512vl")]
53215 unsafe fn test_mm256_mask_unpackhi_ps() {
53216 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53217 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53218 let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
53219 assert_eq_m256(r, a);
53220 let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
53221 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
53222 assert_eq_m256(r, e);
53223 }
53224
53225 #[simd_test(enable = "avx512f,avx512vl")]
53226 unsafe fn test_mm256_maskz_unpackhi_ps() {
53227 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53228 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53229 let r = _mm256_maskz_unpackhi_ps(0, a, b);
53230 assert_eq_m256(r, _mm256_setzero_ps());
53231 let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
53232 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
53233 assert_eq_m256(r, e);
53234 }
53235
53236 #[simd_test(enable = "avx512f,avx512vl")]
53237 unsafe fn test_mm_mask_unpackhi_ps() {
53238 let a = _mm_set_ps(1., 2., 3., 4.);
53239 let b = _mm_set_ps(17., 18., 19., 20.);
53240 let r = _mm_mask_unpackhi_ps(a, 0, a, b);
53241 assert_eq_m128(r, a);
53242 let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
53243 let e = _mm_set_ps(17., 1., 18., 2.);
53244 assert_eq_m128(r, e);
53245 }
53246
53247 #[simd_test(enable = "avx512f,avx512vl")]
53248 unsafe fn test_mm_maskz_unpackhi_ps() {
53249 let a = _mm_set_ps(1., 2., 3., 4.);
53250 let b = _mm_set_ps(17., 18., 19., 20.);
53251 let r = _mm_maskz_unpackhi_ps(0, a, b);
53252 assert_eq_m128(r, _mm_setzero_ps());
53253 let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
53254 let e = _mm_set_ps(17., 1., 18., 2.);
53255 assert_eq_m128(r, e);
53256 }
53257
53258 #[simd_test(enable = "avx512f")]
53259 unsafe fn test_mm512_unpacklo_epi32() {
53260 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53261 let b = _mm512_set_epi32(
53262 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53263 );
53264 let r = _mm512_unpacklo_epi32(a, b);
53265 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
53266 assert_eq_m512i(r, e);
53267 }
53268
53269 #[simd_test(enable = "avx512f")]
53270 unsafe fn test_mm512_mask_unpacklo_epi32() {
53271 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53272 let b = _mm512_set_epi32(
53273 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53274 );
53275 let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
53276 assert_eq_m512i(r, a);
53277 let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
53278 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
53279 assert_eq_m512i(r, e);
53280 }
53281
53282 #[simd_test(enable = "avx512f")]
53283 unsafe fn test_mm512_maskz_unpacklo_epi32() {
53284 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53285 let b = _mm512_set_epi32(
53286 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53287 );
53288 let r = _mm512_maskz_unpacklo_epi32(0, a, b);
53289 assert_eq_m512i(r, _mm512_setzero_si512());
53290 let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
53291 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
53292 assert_eq_m512i(r, e);
53293 }
53294
53295 #[simd_test(enable = "avx512f,avx512vl")]
53296 unsafe fn test_mm256_mask_unpacklo_epi32() {
53297 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53298 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53299 let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
53300 assert_eq_m256i(r, a);
53301 let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
53302 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
53303 assert_eq_m256i(r, e);
53304 }
53305
53306 #[simd_test(enable = "avx512f,avx512vl")]
53307 unsafe fn test_mm256_maskz_unpacklo_epi32() {
53308 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53309 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53310 let r = _mm256_maskz_unpacklo_epi32(0, a, b);
53311 assert_eq_m256i(r, _mm256_setzero_si256());
53312 let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
53313 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
53314 assert_eq_m256i(r, e);
53315 }
53316
53317 #[simd_test(enable = "avx512f,avx512vl")]
53318 unsafe fn test_mm_mask_unpacklo_epi32() {
53319 let a = _mm_set_epi32(1, 2, 3, 4);
53320 let b = _mm_set_epi32(17, 18, 19, 20);
53321 let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
53322 assert_eq_m128i(r, a);
53323 let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
53324 let e = _mm_set_epi32(19, 3, 20, 4);
53325 assert_eq_m128i(r, e);
53326 }
53327
53328 #[simd_test(enable = "avx512f,avx512vl")]
53329 unsafe fn test_mm_maskz_unpacklo_epi32() {
53330 let a = _mm_set_epi32(1, 2, 3, 4);
53331 let b = _mm_set_epi32(17, 18, 19, 20);
53332 let r = _mm_maskz_unpacklo_epi32(0, a, b);
53333 assert_eq_m128i(r, _mm_setzero_si128());
53334 let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
53335 let e = _mm_set_epi32(19, 3, 20, 4);
53336 assert_eq_m128i(r, e);
53337 }
53338
53339 #[simd_test(enable = "avx512f")]
53340 unsafe fn test_mm512_unpacklo_ps() {
53341 let a = _mm512_set_ps(
53342 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53343 );
53344 let b = _mm512_set_ps(
53345 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53346 );
53347 let r = _mm512_unpacklo_ps(a, b);
53348 let e = _mm512_set_ps(
53349 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
53350 );
53351 assert_eq_m512(r, e);
53352 }
53353
53354 #[simd_test(enable = "avx512f")]
53355 unsafe fn test_mm512_mask_unpacklo_ps() {
53356 let a = _mm512_set_ps(
53357 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53358 );
53359 let b = _mm512_set_ps(
53360 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53361 );
53362 let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
53363 assert_eq_m512(r, a);
53364 let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
53365 let e = _mm512_set_ps(
53366 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
53367 );
53368 assert_eq_m512(r, e);
53369 }
53370
53371 #[simd_test(enable = "avx512f")]
53372 unsafe fn test_mm512_maskz_unpacklo_ps() {
53373 let a = _mm512_set_ps(
53374 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53375 );
53376 let b = _mm512_set_ps(
53377 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53378 );
53379 let r = _mm512_maskz_unpacklo_ps(0, a, b);
53380 assert_eq_m512(r, _mm512_setzero_ps());
53381 let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
53382 let e = _mm512_set_ps(
53383 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
53384 );
53385 assert_eq_m512(r, e);
53386 }
53387
53388 #[simd_test(enable = "avx512f,avx512vl")]
53389 unsafe fn test_mm256_mask_unpacklo_ps() {
53390 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53391 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53392 let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
53393 assert_eq_m256(r, a);
53394 let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
53395 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
53396 assert_eq_m256(r, e);
53397 }
53398
53399 #[simd_test(enable = "avx512f,avx512vl")]
53400 unsafe fn test_mm256_maskz_unpacklo_ps() {
53401 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53402 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53403 let r = _mm256_maskz_unpacklo_ps(0, a, b);
53404 assert_eq_m256(r, _mm256_setzero_ps());
53405 let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
53406 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
53407 assert_eq_m256(r, e);
53408 }
53409
53410 #[simd_test(enable = "avx512f,avx512vl")]
53411 unsafe fn test_mm_mask_unpacklo_ps() {
53412 let a = _mm_set_ps(1., 2., 3., 4.);
53413 let b = _mm_set_ps(17., 18., 19., 20.);
53414 let r = _mm_mask_unpacklo_ps(a, 0, a, b);
53415 assert_eq_m128(r, a);
53416 let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
53417 let e = _mm_set_ps(19., 3., 20., 4.);
53418 assert_eq_m128(r, e);
53419 }
53420
53421 #[simd_test(enable = "avx512f,avx512vl")]
53422 unsafe fn test_mm_maskz_unpacklo_ps() {
53423 let a = _mm_set_ps(1., 2., 3., 4.);
53424 let b = _mm_set_ps(17., 18., 19., 20.);
53425 let r = _mm_maskz_unpacklo_ps(0, a, b);
53426 assert_eq_m128(r, _mm_setzero_ps());
53427 let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
53428 let e = _mm_set_ps(19., 3., 20., 4.);
53429 assert_eq_m128(r, e);
53430 }
53431
53432 #[simd_test(enable = "avx512f")]
53433 unsafe fn test_mm512_alignr_epi32() {
53434 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
53435 let b = _mm512_set_epi32(
53436 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
53437 );
53438 let r = _mm512_alignr_epi32::<0>(a, b);
53439 assert_eq_m512i(r, b);
53440 let r = _mm512_alignr_epi32::<16>(a, b);
53441 assert_eq_m512i(r, b);
53442 let r = _mm512_alignr_epi32::<1>(a, b);
53443 let e = _mm512_set_epi32(
53444 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
53445 );
53446 assert_eq_m512i(r, e);
53447 }
53448
53449 #[simd_test(enable = "avx512f")]
53450 unsafe fn test_mm512_mask_alignr_epi32() {
53451 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
53452 let b = _mm512_set_epi32(
53453 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
53454 );
53455 let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
53456 assert_eq_m512i(r, a);
53457 let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
53458 let e = _mm512_set_epi32(
53459 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
53460 );
53461 assert_eq_m512i(r, e);
53462 }
53463
53464 #[simd_test(enable = "avx512f")]
53465 unsafe fn test_mm512_maskz_alignr_epi32() {
53466 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
53467 let b = _mm512_set_epi32(
53468 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
53469 );
53470 let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
53471 assert_eq_m512i(r, _mm512_setzero_si512());
53472 let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
53473 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
53474 assert_eq_m512i(r, e);
53475 }
53476
53477 #[simd_test(enable = "avx512f,avx512vl")]
53478 unsafe fn test_mm256_alignr_epi32() {
53479 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
53480 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
53481 let r = _mm256_alignr_epi32::<0>(a, b);
53482 assert_eq_m256i(r, b);
53483 let r = _mm256_alignr_epi32::<1>(a, b);
53484 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
53485 assert_eq_m256i(r, e);
53486 }
53487
53488 #[simd_test(enable = "avx512f,avx512vl")]
53489 unsafe fn test_mm256_mask_alignr_epi32() {
53490 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
53491 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
53492 let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
53493 assert_eq_m256i(r, a);
53494 let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
53495 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
53496 assert_eq_m256i(r, e);
53497 }
53498
53499 #[simd_test(enable = "avx512f,avx512vl")]
53500 unsafe fn test_mm256_maskz_alignr_epi32() {
53501 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
53502 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
53503 let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
53504 assert_eq_m256i(r, _mm256_setzero_si256());
53505 let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
53506 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
53507 assert_eq_m256i(r, e);
53508 }
53509
53510 #[simd_test(enable = "avx512f,avx512vl")]
53511 unsafe fn test_mm_alignr_epi32() {
53512 let a = _mm_set_epi32(4, 3, 2, 1);
53513 let b = _mm_set_epi32(8, 7, 6, 5);
53514 let r = _mm_alignr_epi32::<0>(a, b);
53515 assert_eq_m128i(r, b);
53516 let r = _mm_alignr_epi32::<1>(a, b);
53517 let e = _mm_set_epi32(1, 8, 7, 6);
53518 assert_eq_m128i(r, e);
53519 }
53520
53521 #[simd_test(enable = "avx512f,avx512vl")]
53522 unsafe fn test_mm_mask_alignr_epi32() {
53523 let a = _mm_set_epi32(4, 3, 2, 1);
53524 let b = _mm_set_epi32(8, 7, 6, 5);
53525 let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
53526 assert_eq_m128i(r, a);
53527 let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
53528 let e = _mm_set_epi32(1, 8, 7, 6);
53529 assert_eq_m128i(r, e);
53530 }
53531
53532 #[simd_test(enable = "avx512f,avx512vl")]
53533 unsafe fn test_mm_maskz_alignr_epi32() {
53534 let a = _mm_set_epi32(4, 3, 2, 1);
53535 let b = _mm_set_epi32(8, 7, 6, 5);
53536 let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
53537 assert_eq_m128i(r, _mm_setzero_si128());
53538 let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
53539 let e = _mm_set_epi32(1, 8, 7, 6);
53540 assert_eq_m128i(r, e);
53541 }
53542
53543 #[simd_test(enable = "avx512f")]
53544 unsafe fn test_mm512_and_epi32() {
53545 #[rustfmt::skip]
53546 let a = _mm512_set_epi32(
53547 1 << 1 | 1 << 2, 0, 0, 0,
53548 0, 0, 0, 0,
53549 0, 0, 0, 0,
53550 0, 0, 0, 1 << 1 | 1 << 3,
53551 );
53552 #[rustfmt::skip]
53553 let b = _mm512_set_epi32(
53554 1 << 1, 0, 0, 0,
53555 0, 0, 0, 0,
53556 0, 0, 0, 0,
53557 0, 0, 0, 1 << 3 | 1 << 4,
53558 );
53559 let r = _mm512_and_epi32(a, b);
53560 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
53561 assert_eq_m512i(r, e);
53562 }
53563
53564 #[simd_test(enable = "avx512f")]
53565 unsafe fn test_mm512_mask_and_epi32() {
53566 #[rustfmt::skip]
53567 let a = _mm512_set_epi32(
53568 1 << 1 | 1 << 2, 0, 0, 0,
53569 0, 0, 0, 0,
53570 0, 0, 0, 0,
53571 0, 0, 0, 1 << 1 | 1 << 3,
53572 );
53573 #[rustfmt::skip]
53574 let b = _mm512_set_epi32(
53575 1 << 1, 0, 0, 0,
53576 0, 0, 0, 0,
53577 0, 0, 0, 0,
53578 0, 0, 0, 1 << 3 | 1 << 4,
53579 );
53580 let r = _mm512_mask_and_epi32(a, 0, a, b);
53581 assert_eq_m512i(r, a);
53582 let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
53583 #[rustfmt::skip]
53584 let e = _mm512_set_epi32(
53585 1 << 1 | 1 << 2, 0, 0, 0,
53586 0, 0, 0, 0,
53587 0, 0, 0, 0,
53588 0, 0, 0, 1 << 3,
53589 );
53590 assert_eq_m512i(r, e);
53591 }
53592
53593 #[simd_test(enable = "avx512f")]
53594 unsafe fn test_mm512_maskz_and_epi32() {
53595 #[rustfmt::skip]
53596 let a = _mm512_set_epi32(
53597 1 << 1 | 1 << 2, 0, 0, 0,
53598 0, 0, 0, 0,
53599 0, 0, 0, 0,
53600 0, 0, 0, 1 << 1 | 1 << 3,
53601 );
53602 #[rustfmt::skip]
53603 let b = _mm512_set_epi32(
53604 1 << 1, 0, 0, 0,
53605 0, 0, 0, 0,
53606 0, 0, 0, 0,
53607 0, 0, 0, 1 << 3 | 1 << 4,
53608 );
53609 let r = _mm512_maskz_and_epi32(0, a, b);
53610 assert_eq_m512i(r, _mm512_setzero_si512());
53611 let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
53612 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
53613 assert_eq_m512i(r, e);
53614 }
53615
53616 #[simd_test(enable = "avx512f,avx512vl")]
53617 unsafe fn test_mm256_mask_and_epi32() {
53618 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53619 let b = _mm256_set1_epi32(1 << 1);
53620 let r = _mm256_mask_and_epi32(a, 0, a, b);
53621 assert_eq_m256i(r, a);
53622 let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
53623 let e = _mm256_set1_epi32(1 << 1);
53624 assert_eq_m256i(r, e);
53625 }
53626
53627 #[simd_test(enable = "avx512f,avx512vl")]
53628 unsafe fn test_mm256_maskz_and_epi32() {
53629 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53630 let b = _mm256_set1_epi32(1 << 1);
53631 let r = _mm256_maskz_and_epi32(0, a, b);
53632 assert_eq_m256i(r, _mm256_setzero_si256());
53633 let r = _mm256_maskz_and_epi32(0b11111111, a, b);
53634 let e = _mm256_set1_epi32(1 << 1);
53635 assert_eq_m256i(r, e);
53636 }
53637
53638 #[simd_test(enable = "avx512f,avx512vl")]
53639 unsafe fn test_mm_mask_and_epi32() {
53640 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53641 let b = _mm_set1_epi32(1 << 1);
53642 let r = _mm_mask_and_epi32(a, 0, a, b);
53643 assert_eq_m128i(r, a);
53644 let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
53645 let e = _mm_set1_epi32(1 << 1);
53646 assert_eq_m128i(r, e);
53647 }
53648
53649 #[simd_test(enable = "avx512f,avx512vl")]
53650 unsafe fn test_mm_maskz_and_epi32() {
53651 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53652 let b = _mm_set1_epi32(1 << 1);
53653 let r = _mm_maskz_and_epi32(0, a, b);
53654 assert_eq_m128i(r, _mm_setzero_si128());
53655 let r = _mm_maskz_and_epi32(0b00001111, a, b);
53656 let e = _mm_set1_epi32(1 << 1);
53657 assert_eq_m128i(r, e);
53658 }
53659
53660 #[simd_test(enable = "avx512f")]
53661 unsafe fn test_mm512_and_si512() {
53662 #[rustfmt::skip]
53663 let a = _mm512_set_epi32(
53664 1 << 1 | 1 << 2, 0, 0, 0,
53665 0, 0, 0, 0,
53666 0, 0, 0, 0,
53667 0, 0, 0, 1 << 1 | 1 << 3,
53668 );
53669 #[rustfmt::skip]
53670 let b = _mm512_set_epi32(
53671 1 << 1, 0, 0, 0,
53672 0, 0, 0, 0,
53673 0, 0, 0, 0,
53674 0, 0, 0, 1 << 3 | 1 << 4,
53675 );
53676 let r = _mm512_and_epi32(a, b);
53677 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
53678 assert_eq_m512i(r, e);
53679 }
53680
53681 #[simd_test(enable = "avx512f")]
53682 unsafe fn test_mm512_or_epi32() {
53683 #[rustfmt::skip]
53684 let a = _mm512_set_epi32(
53685 1 << 1 | 1 << 2, 0, 0, 0,
53686 0, 0, 0, 0,
53687 0, 0, 0, 0,
53688 0, 0, 0, 1 << 1 | 1 << 3,
53689 );
53690 #[rustfmt::skip]
53691 let b = _mm512_set_epi32(
53692 1 << 1, 0, 0, 0,
53693 0, 0, 0, 0,
53694 0, 0, 0, 0,
53695 0, 0, 0, 1 << 3 | 1 << 4,
53696 );
53697 let r = _mm512_or_epi32(a, b);
53698 #[rustfmt::skip]
53699 let e = _mm512_set_epi32(
53700 1 << 1 | 1 << 2, 0, 0, 0,
53701 0, 0, 0, 0,
53702 0, 0, 0, 0,
53703 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53704 );
53705 assert_eq_m512i(r, e);
53706 }
53707
53708 #[simd_test(enable = "avx512f")]
53709 unsafe fn test_mm512_mask_or_epi32() {
53710 #[rustfmt::skip]
53711 let a = _mm512_set_epi32(
53712 1 << 1 | 1 << 2, 0, 0, 0,
53713 0, 0, 0, 0,
53714 0, 0, 0, 0,
53715 0, 0, 0, 1 << 1 | 1 << 3,
53716 );
53717 #[rustfmt::skip]
53718 let b = _mm512_set_epi32(
53719 1 << 1, 0, 0, 0,
53720 0, 0, 0, 0,
53721 0, 0, 0, 0,
53722 0, 0, 0, 1 << 3 | 1 << 4,
53723 );
53724 let r = _mm512_mask_or_epi32(a, 0, a, b);
53725 assert_eq_m512i(r, a);
53726 let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
53727 #[rustfmt::skip]
53728 let e = _mm512_set_epi32(
53729 1 << 1 | 1 << 2, 0, 0, 0,
53730 0, 0, 0, 0,
53731 0, 0, 0, 0,
53732 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53733 );
53734 assert_eq_m512i(r, e);
53735 }
53736
53737 #[simd_test(enable = "avx512f")]
53738 unsafe fn test_mm512_maskz_or_epi32() {
53739 #[rustfmt::skip]
53740 let a = _mm512_set_epi32(
53741 1 << 1 | 1 << 2, 0, 0, 0,
53742 0, 0, 0, 0,
53743 0, 0, 0, 0,
53744 0, 0, 0, 1 << 1 | 1 << 3,
53745 );
53746 #[rustfmt::skip]
53747 let b = _mm512_set_epi32(
53748 1 << 1, 0, 0, 0,
53749 0, 0, 0, 0,
53750 0, 0, 0, 0,
53751 0, 0, 0, 1 << 3 | 1 << 4,
53752 );
53753 let r = _mm512_maskz_or_epi32(0, a, b);
53754 assert_eq_m512i(r, _mm512_setzero_si512());
53755 let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
53756 #[rustfmt::skip]
53757 let e = _mm512_set_epi32(
53758 0, 0, 0, 0,
53759 0, 0, 0, 0,
53760 0, 0, 0, 0,
53761 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53762 );
53763 assert_eq_m512i(r, e);
53764 }
53765
53766 #[simd_test(enable = "avx512f,avx512vl")]
53767 unsafe fn test_mm256_or_epi32() {
53768 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53769 let b = _mm256_set1_epi32(1 << 1);
53770 let r = _mm256_or_epi32(a, b);
53771 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
53772 assert_eq_m256i(r, e);
53773 }
53774
53775 #[simd_test(enable = "avx512f,avx512vl")]
53776 unsafe fn test_mm256_mask_or_epi32() {
53777 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53778 let b = _mm256_set1_epi32(1 << 1);
53779 let r = _mm256_mask_or_epi32(a, 0, a, b);
53780 assert_eq_m256i(r, a);
53781 let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
53782 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
53783 assert_eq_m256i(r, e);
53784 }
53785
53786 #[simd_test(enable = "avx512f,avx512vl")]
53787 unsafe fn test_mm256_maskz_or_epi32() {
53788 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53789 let b = _mm256_set1_epi32(1 << 1);
53790 let r = _mm256_maskz_or_epi32(0, a, b);
53791 assert_eq_m256i(r, _mm256_setzero_si256());
53792 let r = _mm256_maskz_or_epi32(0b11111111, a, b);
53793 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
53794 assert_eq_m256i(r, e);
53795 }
53796
53797 #[simd_test(enable = "avx512f,avx512vl")]
53798 unsafe fn test_mm_or_epi32() {
53799 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53800 let b = _mm_set1_epi32(1 << 1);
53801 let r = _mm_or_epi32(a, b);
53802 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
53803 assert_eq_m128i(r, e);
53804 }
53805
53806 #[simd_test(enable = "avx512f,avx512vl")]
53807 unsafe fn test_mm_mask_or_epi32() {
53808 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53809 let b = _mm_set1_epi32(1 << 1);
53810 let r = _mm_mask_or_epi32(a, 0, a, b);
53811 assert_eq_m128i(r, a);
53812 let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
53813 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
53814 assert_eq_m128i(r, e);
53815 }
53816
53817 #[simd_test(enable = "avx512f,avx512vl")]
53818 unsafe fn test_mm_maskz_or_epi32() {
53819 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53820 let b = _mm_set1_epi32(1 << 1);
53821 let r = _mm_maskz_or_epi32(0, a, b);
53822 assert_eq_m128i(r, _mm_setzero_si128());
53823 let r = _mm_maskz_or_epi32(0b00001111, a, b);
53824 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
53825 assert_eq_m128i(r, e);
53826 }
53827
53828 #[simd_test(enable = "avx512f")]
53829 unsafe fn test_mm512_or_si512() {
53830 #[rustfmt::skip]
53831 let a = _mm512_set_epi32(
53832 1 << 1 | 1 << 2, 0, 0, 0,
53833 0, 0, 0, 0,
53834 0, 0, 0, 0,
53835 0, 0, 0, 1 << 1 | 1 << 3,
53836 );
53837 #[rustfmt::skip]
53838 let b = _mm512_set_epi32(
53839 1 << 1, 0, 0, 0,
53840 0, 0, 0, 0,
53841 0, 0, 0, 0,
53842 0, 0, 0, 1 << 3 | 1 << 4,
53843 );
53844 let r = _mm512_or_epi32(a, b);
53845 #[rustfmt::skip]
53846 let e = _mm512_set_epi32(
53847 1 << 1 | 1 << 2, 0, 0, 0,
53848 0, 0, 0, 0,
53849 0, 0, 0, 0,
53850 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53851 );
53852 assert_eq_m512i(r, e);
53853 }
53854
53855 #[simd_test(enable = "avx512f")]
53856 unsafe fn test_mm512_xor_epi32() {
53857 #[rustfmt::skip]
53858 let a = _mm512_set_epi32(
53859 1 << 1 | 1 << 2, 0, 0, 0,
53860 0, 0, 0, 0,
53861 0, 0, 0, 0,
53862 0, 0, 0, 1 << 1 | 1 << 3,
53863 );
53864 #[rustfmt::skip]
53865 let b = _mm512_set_epi32(
53866 1 << 1, 0, 0, 0,
53867 0, 0, 0, 0,
53868 0, 0, 0, 0,
53869 0, 0, 0, 1 << 3 | 1 << 4,
53870 );
53871 let r = _mm512_xor_epi32(a, b);
53872 #[rustfmt::skip]
53873 let e = _mm512_set_epi32(
53874 1 << 2, 0, 0, 0,
53875 0, 0, 0, 0,
53876 0, 0, 0, 0,
53877 0, 0, 0, 1 << 1 | 1 << 4,
53878 );
53879 assert_eq_m512i(r, e);
53880 }
53881
53882 #[simd_test(enable = "avx512f")]
53883 unsafe fn test_mm512_mask_xor_epi32() {
53884 #[rustfmt::skip]
53885 let a = _mm512_set_epi32(
53886 1 << 1 | 1 << 2, 0, 0, 0,
53887 0, 0, 0, 0,
53888 0, 0, 0, 0,
53889 0, 0, 0, 1 << 1 | 1 << 3,
53890 );
53891 #[rustfmt::skip]
53892 let b = _mm512_set_epi32(
53893 1 << 1, 0, 0, 0,
53894 0, 0, 0, 0,
53895 0, 0, 0, 0,
53896 0, 0, 0, 1 << 3 | 1 << 4,
53897 );
53898 let r = _mm512_mask_xor_epi32(a, 0, a, b);
53899 assert_eq_m512i(r, a);
53900 let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
53901 #[rustfmt::skip]
53902 let e = _mm512_set_epi32(
53903 1 << 1 | 1 << 2, 0, 0, 0,
53904 0, 0, 0, 0,
53905 0, 0, 0, 0,
53906 0, 0, 0, 1 << 1 | 1 << 4,
53907 );
53908 assert_eq_m512i(r, e);
53909 }
53910
53911 #[simd_test(enable = "avx512f")]
53912 unsafe fn test_mm512_maskz_xor_epi32() {
53913 #[rustfmt::skip]
53914 let a = _mm512_set_epi32(
53915 1 << 1 | 1 << 2, 0, 0, 0,
53916 0, 0, 0, 0,
53917 0, 0, 0, 0,
53918 0, 0, 0, 1 << 1 | 1 << 3,
53919 );
53920 #[rustfmt::skip]
53921 let b = _mm512_set_epi32(
53922 1 << 1, 0, 0, 0,
53923 0, 0, 0, 0,
53924 0, 0, 0, 0,
53925 0, 0, 0, 1 << 3 | 1 << 4,
53926 );
53927 let r = _mm512_maskz_xor_epi32(0, a, b);
53928 assert_eq_m512i(r, _mm512_setzero_si512());
53929 let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
53930 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
53931 assert_eq_m512i(r, e);
53932 }
53933
53934 #[simd_test(enable = "avx512f,avx512vl")]
53935 unsafe fn test_mm256_xor_epi32() {
53936 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53937 let b = _mm256_set1_epi32(1 << 1);
53938 let r = _mm256_xor_epi32(a, b);
53939 let e = _mm256_set1_epi32(1 << 2);
53940 assert_eq_m256i(r, e);
53941 }
53942
53943 #[simd_test(enable = "avx512f,avx512vl")]
53944 unsafe fn test_mm256_mask_xor_epi32() {
53945 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53946 let b = _mm256_set1_epi32(1 << 1);
53947 let r = _mm256_mask_xor_epi32(a, 0, a, b);
53948 assert_eq_m256i(r, a);
53949 let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
53950 let e = _mm256_set1_epi32(1 << 2);
53951 assert_eq_m256i(r, e);
53952 }
53953
53954 #[simd_test(enable = "avx512f,avx512vl")]
53955 unsafe fn test_mm256_maskz_xor_epi32() {
53956 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53957 let b = _mm256_set1_epi32(1 << 1);
53958 let r = _mm256_maskz_xor_epi32(0, a, b);
53959 assert_eq_m256i(r, _mm256_setzero_si256());
53960 let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
53961 let e = _mm256_set1_epi32(1 << 2);
53962 assert_eq_m256i(r, e);
53963 }
53964
53965 #[simd_test(enable = "avx512f,avx512vl")]
53966 unsafe fn test_mm_xor_epi32() {
53967 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53968 let b = _mm_set1_epi32(1 << 1);
53969 let r = _mm_xor_epi32(a, b);
53970 let e = _mm_set1_epi32(1 << 2);
53971 assert_eq_m128i(r, e);
53972 }
53973
53974 #[simd_test(enable = "avx512f,avx512vl")]
53975 unsafe fn test_mm_mask_xor_epi32() {
53976 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53977 let b = _mm_set1_epi32(1 << 1);
53978 let r = _mm_mask_xor_epi32(a, 0, a, b);
53979 assert_eq_m128i(r, a);
53980 let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
53981 let e = _mm_set1_epi32(1 << 2);
53982 assert_eq_m128i(r, e);
53983 }
53984
53985 #[simd_test(enable = "avx512f,avx512vl")]
53986 unsafe fn test_mm_maskz_xor_epi32() {
53987 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53988 let b = _mm_set1_epi32(1 << 1);
53989 let r = _mm_maskz_xor_epi32(0, a, b);
53990 assert_eq_m128i(r, _mm_setzero_si128());
53991 let r = _mm_maskz_xor_epi32(0b00001111, a, b);
53992 let e = _mm_set1_epi32(1 << 2);
53993 assert_eq_m128i(r, e);
53994 }
53995
53996 #[simd_test(enable = "avx512f")]
53997 unsafe fn test_mm512_xor_si512() {
53998 #[rustfmt::skip]
53999 let a = _mm512_set_epi32(
54000 1 << 1 | 1 << 2, 0, 0, 0,
54001 0, 0, 0, 0,
54002 0, 0, 0, 0,
54003 0, 0, 0, 1 << 1 | 1 << 3,
54004 );
54005 #[rustfmt::skip]
54006 let b = _mm512_set_epi32(
54007 1 << 1, 0, 0, 0,
54008 0, 0, 0, 0,
54009 0, 0, 0, 0,
54010 0, 0, 0, 1 << 3 | 1 << 4,
54011 );
54012 let r = _mm512_xor_epi32(a, b);
54013 #[rustfmt::skip]
54014 let e = _mm512_set_epi32(
54015 1 << 2, 0, 0, 0,
54016 0, 0, 0, 0,
54017 0, 0, 0, 0,
54018 0, 0, 0, 1 << 1 | 1 << 4,
54019 );
54020 assert_eq_m512i(r, e);
54021 }
54022
54023 #[simd_test(enable = "avx512f")]
54024 unsafe fn test_mm512_andnot_epi32() {
54025 let a = _mm512_set1_epi32(0);
54026 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
54027 let r = _mm512_andnot_epi32(a, b);
54028 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
54029 assert_eq_m512i(r, e);
54030 }
54031
54032 #[simd_test(enable = "avx512f")]
54033 unsafe fn test_mm512_mask_andnot_epi32() {
54034 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
54035 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
54036 let r = _mm512_mask_andnot_epi32(a, 0, a, b);
54037 assert_eq_m512i(r, a);
54038 let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
54039 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
54040 assert_eq_m512i(r, e);
54041 }
54042
54043 #[simd_test(enable = "avx512f")]
54044 unsafe fn test_mm512_maskz_andnot_epi32() {
54045 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
54046 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
54047 let r = _mm512_maskz_andnot_epi32(0, a, b);
54048 assert_eq_m512i(r, _mm512_setzero_si512());
54049 let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
54050 #[rustfmt::skip]
54051 let e = _mm512_set_epi32(
54052 0, 0, 0, 0,
54053 0, 0, 0, 0,
54054 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
54055 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
54056 );
54057 assert_eq_m512i(r, e);
54058 }
54059
54060 #[simd_test(enable = "avx512f,avx512vl")]
54061 unsafe fn test_mm256_mask_andnot_epi32() {
54062 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
54063 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
54064 let r = _mm256_mask_andnot_epi32(a, 0, a, b);
54065 assert_eq_m256i(r, a);
54066 let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
54067 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
54068 assert_eq_m256i(r, e);
54069 }
54070
54071 #[simd_test(enable = "avx512f,avx512vl")]
54072 unsafe fn test_mm256_maskz_andnot_epi32() {
54073 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
54074 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
54075 let r = _mm256_maskz_andnot_epi32(0, a, b);
54076 assert_eq_m256i(r, _mm256_setzero_si256());
54077 let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
54078 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
54079 assert_eq_m256i(r, e);
54080 }
54081
54082 #[simd_test(enable = "avx512f,avx512vl")]
54083 unsafe fn test_mm_mask_andnot_epi32() {
54084 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
54085 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
54086 let r = _mm_mask_andnot_epi32(a, 0, a, b);
54087 assert_eq_m128i(r, a);
54088 let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
54089 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
54090 assert_eq_m128i(r, e);
54091 }
54092
54093 #[simd_test(enable = "avx512f,avx512vl")]
54094 unsafe fn test_mm_maskz_andnot_epi32() {
54095 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
54096 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
54097 let r = _mm_maskz_andnot_epi32(0, a, b);
54098 assert_eq_m128i(r, _mm_setzero_si128());
54099 let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
54100 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
54101 assert_eq_m128i(r, e);
54102 }
54103
54104 #[simd_test(enable = "avx512f")]
54105 unsafe fn test_mm512_kand() {
54106 let a: u16 = 0b11001100_00110011;
54107 let b: u16 = 0b11001100_00110011;
54108 let r = _mm512_kand(a, b);
54109 let e: u16 = 0b11001100_00110011;
54110 assert_eq!(r, e);
54111 }
54112
54113 #[simd_test(enable = "avx512f")]
54114 unsafe fn test_kand_mask16() {
54115 let a: u16 = 0b11001100_00110011;
54116 let b: u16 = 0b11001100_00110011;
54117 let r = _kand_mask16(a, b);
54118 let e: u16 = 0b11001100_00110011;
54119 assert_eq!(r, e);
54120 }
54121
54122 #[simd_test(enable = "avx512f")]
54123 unsafe fn test_mm512_kor() {
54124 let a: u16 = 0b11001100_00110011;
54125 let b: u16 = 0b00101110_00001011;
54126 let r = _mm512_kor(a, b);
54127 let e: u16 = 0b11101110_00111011;
54128 assert_eq!(r, e);
54129 }
54130
54131 #[simd_test(enable = "avx512f")]
54132 unsafe fn test_kor_mask16() {
54133 let a: u16 = 0b11001100_00110011;
54134 let b: u16 = 0b00101110_00001011;
54135 let r = _kor_mask16(a, b);
54136 let e: u16 = 0b11101110_00111011;
54137 assert_eq!(r, e);
54138 }
54139
54140 #[simd_test(enable = "avx512f")]
54141 unsafe fn test_mm512_kxor() {
54142 let a: u16 = 0b11001100_00110011;
54143 let b: u16 = 0b00101110_00001011;
54144 let r = _mm512_kxor(a, b);
54145 let e: u16 = 0b11100010_00111000;
54146 assert_eq!(r, e);
54147 }
54148
54149 #[simd_test(enable = "avx512f")]
54150 unsafe fn test_kxor_mask16() {
54151 let a: u16 = 0b11001100_00110011;
54152 let b: u16 = 0b00101110_00001011;
54153 let r = _kxor_mask16(a, b);
54154 let e: u16 = 0b11100010_00111000;
54155 assert_eq!(r, e);
54156 }
54157
54158 #[simd_test(enable = "avx512f")]
54159 unsafe fn test_mm512_knot() {
54160 let a: u16 = 0b11001100_00110011;
54161 let r = _mm512_knot(a);
54162 let e: u16 = 0b00110011_11001100;
54163 assert_eq!(r, e);
54164 }
54165
54166 #[simd_test(enable = "avx512f")]
54167 unsafe fn test_knot_mask16() {
54168 let a: u16 = 0b11001100_00110011;
54169 let r = _knot_mask16(a);
54170 let e: u16 = 0b00110011_11001100;
54171 assert_eq!(r, e);
54172 }
54173
54174 #[simd_test(enable = "avx512f")]
54175 unsafe fn test_mm512_kandn() {
54176 let a: u16 = 0b11001100_00110011;
54177 let b: u16 = 0b00101110_00001011;
54178 let r = _mm512_kandn(a, b);
54179 let e: u16 = 0b00100010_00001000;
54180 assert_eq!(r, e);
54181 }
54182
54183 #[simd_test(enable = "avx512f")]
54184 unsafe fn test_kandn_mask16() {
54185 let a: u16 = 0b11001100_00110011;
54186 let b: u16 = 0b00101110_00001011;
54187 let r = _kandn_mask16(a, b);
54188 let e: u16 = 0b00100010_00001000;
54189 assert_eq!(r, e);
54190 }
54191
54192 #[simd_test(enable = "avx512f")]
54193 unsafe fn test_mm512_kxnor() {
54194 let a: u16 = 0b11001100_00110011;
54195 let b: u16 = 0b00101110_00001011;
54196 let r = _mm512_kxnor(a, b);
54197 let e: u16 = 0b00011101_11000111;
54198 assert_eq!(r, e);
54199 }
54200
54201 #[simd_test(enable = "avx512f")]
54202 unsafe fn test_kxnor_mask16() {
54203 let a: u16 = 0b11001100_00110011;
54204 let b: u16 = 0b00101110_00001011;
54205 let r = _kxnor_mask16(a, b);
54206 let e: u16 = 0b00011101_11000111;
54207 assert_eq!(r, e);
54208 }
54209
54210 #[simd_test(enable = "avx512f")]
54211 unsafe fn test_mm512_kmov() {
54212 let a: u16 = 0b11001100_00110011;
54213 let r = _mm512_kmov(a);
54214 let e: u16 = 0b11001100_00110011;
54215 assert_eq!(r, e);
54216 }
54217
54218 #[simd_test(enable = "avx512f")]
54219 unsafe fn test_mm512_int2mask() {
54220 let a: i32 = 0b11001100_00110011;
54221 let r = _mm512_int2mask(a);
54222 let e: u16 = 0b11001100_00110011;
54223 assert_eq!(r, e);
54224 }
54225
54226 #[simd_test(enable = "avx512f")]
54227 unsafe fn test_mm512_mask2int() {
54228 let k1: __mmask16 = 0b11001100_00110011;
54229 let r = _mm512_mask2int(k1);
54230 let e: i32 = 0b11001100_00110011;
54231 assert_eq!(r, e);
54232 }
54233
54234 #[simd_test(enable = "avx512f")]
54235 unsafe fn test_mm512_kunpackb() {
54236 let a: u16 = 0b11001100_00110011;
54237 let b: u16 = 0b00101110_00001011;
54238 let r = _mm512_kunpackb(a, b);
54239 let e: u16 = 0b00101110_00110011;
54240 assert_eq!(r, e);
54241 }
54242
54243 #[simd_test(enable = "avx512f")]
54244 unsafe fn test_mm512_kortestc() {
54245 let a: u16 = 0b11001100_00110011;
54246 let b: u16 = 0b00101110_00001011;
54247 let r = _mm512_kortestc(a, b);
54248 assert_eq!(r, 0);
54249 let b: u16 = 0b11111111_11111111;
54250 let r = _mm512_kortestc(a, b);
54251 assert_eq!(r, 1);
54252 }
54253
54254 #[simd_test(enable = "avx512f")]
54255 unsafe fn test_mm512_test_epi32_mask() {
54256 let a = _mm512_set1_epi32(1 << 0);
54257 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
54258 let r = _mm512_test_epi32_mask(a, b);
54259 let e: __mmask16 = 0b11111111_11111111;
54260 assert_eq!(r, e);
54261 }
54262
54263 #[simd_test(enable = "avx512f")]
54264 unsafe fn test_mm512_mask_test_epi32_mask() {
54265 let a = _mm512_set1_epi32(1 << 0);
54266 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
54267 let r = _mm512_mask_test_epi32_mask(0, a, b);
54268 assert_eq!(r, 0);
54269 let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
54270 let e: __mmask16 = 0b11111111_11111111;
54271 assert_eq!(r, e);
54272 }
54273
54274 #[simd_test(enable = "avx512f,avx512vl")]
54275 unsafe fn test_mm256_test_epi32_mask() {
54276 let a = _mm256_set1_epi32(1 << 0);
54277 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
54278 let r = _mm256_test_epi32_mask(a, b);
54279 let e: __mmask8 = 0b11111111;
54280 assert_eq!(r, e);
54281 }
54282
54283 #[simd_test(enable = "avx512f,avx512vl")]
54284 unsafe fn test_mm256_mask_test_epi32_mask() {
54285 let a = _mm256_set1_epi32(1 << 0);
54286 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
54287 let r = _mm256_mask_test_epi32_mask(0, a, b);
54288 assert_eq!(r, 0);
54289 let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
54290 let e: __mmask8 = 0b11111111;
54291 assert_eq!(r, e);
54292 }
54293
54294 #[simd_test(enable = "avx512f,avx512vl")]
54295 unsafe fn test_mm_test_epi32_mask() {
54296 let a = _mm_set1_epi32(1 << 0);
54297 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
54298 let r = _mm_test_epi32_mask(a, b);
54299 let e: __mmask8 = 0b00001111;
54300 assert_eq!(r, e);
54301 }
54302
54303 #[simd_test(enable = "avx512f,avx512vl")]
54304 unsafe fn test_mm_mask_test_epi32_mask() {
54305 let a = _mm_set1_epi32(1 << 0);
54306 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
54307 let r = _mm_mask_test_epi32_mask(0, a, b);
54308 assert_eq!(r, 0);
54309 let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
54310 let e: __mmask8 = 0b00001111;
54311 assert_eq!(r, e);
54312 }
54313
54314 #[simd_test(enable = "avx512f")]
54315 unsafe fn test_mm512_testn_epi32_mask() {
54316 let a = _mm512_set1_epi32(1 << 0);
54317 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
54318 let r = _mm512_testn_epi32_mask(a, b);
54319 let e: __mmask16 = 0b00000000_00000000;
54320 assert_eq!(r, e);
54321 }
54322
54323 #[simd_test(enable = "avx512f")]
54324 unsafe fn test_mm512_mask_testn_epi32_mask() {
54325 let a = _mm512_set1_epi32(1 << 0);
54326 let b = _mm512_set1_epi32(1 << 1);
54327 let r = _mm512_mask_test_epi32_mask(0, a, b);
54328 assert_eq!(r, 0);
54329 let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
54330 let e: __mmask16 = 0b11111111_11111111;
54331 assert_eq!(r, e);
54332 }
54333
54334 #[simd_test(enable = "avx512f,avx512vl")]
54335 unsafe fn test_mm256_testn_epi32_mask() {
54336 let a = _mm256_set1_epi32(1 << 0);
54337 let b = _mm256_set1_epi32(1 << 1);
54338 let r = _mm256_testn_epi32_mask(a, b);
54339 let e: __mmask8 = 0b11111111;
54340 assert_eq!(r, e);
54341 }
54342
54343 #[simd_test(enable = "avx512f,avx512vl")]
54344 unsafe fn test_mm256_mask_testn_epi32_mask() {
54345 let a = _mm256_set1_epi32(1 << 0);
54346 let b = _mm256_set1_epi32(1 << 1);
54347 let r = _mm256_mask_test_epi32_mask(0, a, b);
54348 assert_eq!(r, 0);
54349 let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
54350 let e: __mmask8 = 0b11111111;
54351 assert_eq!(r, e);
54352 }
54353
54354 #[simd_test(enable = "avx512f,avx512vl")]
54355 unsafe fn test_mm_testn_epi32_mask() {
54356 let a = _mm_set1_epi32(1 << 0);
54357 let b = _mm_set1_epi32(1 << 1);
54358 let r = _mm_testn_epi32_mask(a, b);
54359 let e: __mmask8 = 0b00001111;
54360 assert_eq!(r, e);
54361 }
54362
54363 #[simd_test(enable = "avx512f,avx512vl")]
54364 unsafe fn test_mm_mask_testn_epi32_mask() {
54365 let a = _mm_set1_epi32(1 << 0);
54366 let b = _mm_set1_epi32(1 << 1);
54367 let r = _mm_mask_test_epi32_mask(0, a, b);
54368 assert_eq!(r, 0);
54369 let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
54370 let e: __mmask8 = 0b00001111;
54371 assert_eq!(r, e);
54372 }
54373
54374 #[simd_test(enable = "avx512f")]
54375 unsafe fn test_mm512_stream_ps() {
54376 #[repr(align(32))]
54377 struct Memory {
54378 pub data: [f32; 16],
54379 }
54380 let a = _mm512_set1_ps(7.0);
54381 let mut mem = Memory { data: [-1.0; 16] };
54382
54383 _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
54384 for i in 0..16 {
54385 assert_eq!(mem.data[i], get_m512(a, i));
54386 }
54387 }
54388
54389 #[simd_test(enable = "avx512f")]
54390 unsafe fn test_mm512_reduce_add_epi32() {
54391 let a = _mm512_set1_epi32(1);
54392 let e: i32 = _mm512_reduce_add_epi32(a);
54393 assert_eq!(16, e);
54394 }
54395
54396 #[simd_test(enable = "avx512f")]
54397 unsafe fn test_mm512_mask_reduce_add_epi32() {
54398 let a = _mm512_set1_epi32(1);
54399 let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
54400 assert_eq!(8, e);
54401 }
54402
54403 #[simd_test(enable = "avx512f")]
54404 unsafe fn test_mm512_reduce_add_ps() {
54405 let a = _mm512_set1_ps(1.);
54406 let e: f32 = _mm512_reduce_add_ps(a);
54407 assert_eq!(16., e);
54408 }
54409
54410 #[simd_test(enable = "avx512f")]
54411 unsafe fn test_mm512_mask_reduce_add_ps() {
54412 let a = _mm512_set1_ps(1.);
54413 let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
54414 assert_eq!(8., e);
54415 }
54416
54417 #[simd_test(enable = "avx512f")]
54418 unsafe fn test_mm512_reduce_mul_epi32() {
54419 let a = _mm512_set1_epi32(2);
54420 let e: i32 = _mm512_reduce_mul_epi32(a);
54421 assert_eq!(65536, e);
54422 }
54423
54424 #[simd_test(enable = "avx512f")]
54425 unsafe fn test_mm512_mask_reduce_mul_epi32() {
54426 let a = _mm512_set1_epi32(2);
54427 let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
54428 assert_eq!(256, e);
54429 }
54430
54431 #[simd_test(enable = "avx512f")]
54432 unsafe fn test_mm512_reduce_mul_ps() {
54433 let a = _mm512_set1_ps(2.);
54434 let e: f32 = _mm512_reduce_mul_ps(a);
54435 assert_eq!(65536., e);
54436 }
54437
54438 #[simd_test(enable = "avx512f")]
54439 unsafe fn test_mm512_mask_reduce_mul_ps() {
54440 let a = _mm512_set1_ps(2.);
54441 let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
54442 assert_eq!(256., e);
54443 }
54444
54445 #[simd_test(enable = "avx512f")]
54446 unsafe fn test_mm512_reduce_max_epi32() {
54447 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54448 let e: i32 = _mm512_reduce_max_epi32(a);
54449 assert_eq!(15, e);
54450 }
54451
54452 #[simd_test(enable = "avx512f")]
54453 unsafe fn test_mm512_mask_reduce_max_epi32() {
54454 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54455 let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
54456 assert_eq!(7, e);
54457 }
54458
54459 #[simd_test(enable = "avx512f")]
54460 unsafe fn test_mm512_reduce_max_epu32() {
54461 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54462 let e: u32 = _mm512_reduce_max_epu32(a);
54463 assert_eq!(15, e);
54464 }
54465
54466 #[simd_test(enable = "avx512f")]
54467 unsafe fn test_mm512_mask_reduce_max_epu32() {
54468 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54469 let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
54470 assert_eq!(7, e);
54471 }
54472
54473 #[simd_test(enable = "avx512f")]
54474 unsafe fn test_mm512_reduce_max_ps() {
54475 let a = _mm512_set_ps(
54476 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54477 );
54478 let e: f32 = _mm512_reduce_max_ps(a);
54479 assert_eq!(15., e);
54480 }
54481
54482 #[simd_test(enable = "avx512f")]
54483 unsafe fn test_mm512_mask_reduce_max_ps() {
54484 let a = _mm512_set_ps(
54485 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54486 );
54487 let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
54488 assert_eq!(7., e);
54489 }
54490
54491 #[simd_test(enable = "avx512f")]
54492 unsafe fn test_mm512_reduce_min_epi32() {
54493 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54494 let e: i32 = _mm512_reduce_min_epi32(a);
54495 assert_eq!(0, e);
54496 }
54497
54498 #[simd_test(enable = "avx512f")]
54499 unsafe fn test_mm512_mask_reduce_min_epi32() {
54500 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54501 let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
54502 assert_eq!(0, e);
54503 }
54504
54505 #[simd_test(enable = "avx512f")]
54506 unsafe fn test_mm512_reduce_min_epu32() {
54507 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54508 let e: u32 = _mm512_reduce_min_epu32(a);
54509 assert_eq!(0, e);
54510 }
54511
54512 #[simd_test(enable = "avx512f")]
54513 unsafe fn test_mm512_mask_reduce_min_epu32() {
54514 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54515 let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
54516 assert_eq!(0, e);
54517 }
54518
54519 #[simd_test(enable = "avx512f")]
54520 unsafe fn test_mm512_reduce_min_ps() {
54521 let a = _mm512_set_ps(
54522 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54523 );
54524 let e: f32 = _mm512_reduce_min_ps(a);
54525 assert_eq!(0., e);
54526 }
54527
54528 #[simd_test(enable = "avx512f")]
54529 unsafe fn test_mm512_mask_reduce_min_ps() {
54530 let a = _mm512_set_ps(
54531 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54532 );
54533 let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
54534 assert_eq!(0., e);
54535 }
54536
54537 #[simd_test(enable = "avx512f")]
54538 unsafe fn test_mm512_reduce_and_epi32() {
54539 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54540 let e: i32 = _mm512_reduce_and_epi32(a);
54541 assert_eq!(0, e);
54542 }
54543
54544 #[simd_test(enable = "avx512f")]
54545 unsafe fn test_mm512_mask_reduce_and_epi32() {
54546 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54547 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
54548 assert_eq!(1, e);
54549 }
54550
54551 #[simd_test(enable = "avx512f")]
54552 unsafe fn test_mm512_reduce_or_epi32() {
54553 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54554 let e: i32 = _mm512_reduce_or_epi32(a);
54555 assert_eq!(3, e);
54556 }
54557
54558 #[simd_test(enable = "avx512f")]
54559 unsafe fn test_mm512_mask_reduce_or_epi32() {
54560 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54561 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
54562 assert_eq!(1, e);
54563 }
54564
54565 #[simd_test(enable = "avx512f")]
54566 unsafe fn test_mm512_mask_compress_epi32() {
54567 let src = _mm512_set1_epi32(200);
54568 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54569 let r = _mm512_mask_compress_epi32(src, 0, a);
54570 assert_eq_m512i(r, src);
54571 let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
54572 let e = _mm512_set_epi32(
54573 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
54574 );
54575 assert_eq_m512i(r, e);
54576 }
54577
54578 #[simd_test(enable = "avx512f")]
54579 unsafe fn test_mm512_maskz_compress_epi32() {
54580 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54581 let r = _mm512_maskz_compress_epi32(0, a);
54582 assert_eq_m512i(r, _mm512_setzero_si512());
54583 let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
54584 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
54585 assert_eq_m512i(r, e);
54586 }
54587
54588 #[simd_test(enable = "avx512f,avx512vl")]
54589 unsafe fn test_mm256_mask_compress_epi32() {
54590 let src = _mm256_set1_epi32(200);
54591 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54592 let r = _mm256_mask_compress_epi32(src, 0, a);
54593 assert_eq_m256i(r, src);
54594 let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
54595 let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
54596 assert_eq_m256i(r, e);
54597 }
54598
54599 #[simd_test(enable = "avx512f,avx512vl")]
54600 unsafe fn test_mm256_maskz_compress_epi32() {
54601 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54602 let r = _mm256_maskz_compress_epi32(0, a);
54603 assert_eq_m256i(r, _mm256_setzero_si256());
54604 let r = _mm256_maskz_compress_epi32(0b01010101, a);
54605 let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
54606 assert_eq_m256i(r, e);
54607 }
54608
54609 #[simd_test(enable = "avx512f,avx512vl")]
54610 unsafe fn test_mm_mask_compress_epi32() {
54611 let src = _mm_set1_epi32(200);
54612 let a = _mm_set_epi32(0, 1, 2, 3);
54613 let r = _mm_mask_compress_epi32(src, 0, a);
54614 assert_eq_m128i(r, src);
54615 let r = _mm_mask_compress_epi32(src, 0b00000101, a);
54616 let e = _mm_set_epi32(200, 200, 1, 3);
54617 assert_eq_m128i(r, e);
54618 }
54619
54620 #[simd_test(enable = "avx512f,avx512vl")]
54621 unsafe fn test_mm_maskz_compress_epi32() {
54622 let a = _mm_set_epi32(0, 1, 2, 3);
54623 let r = _mm_maskz_compress_epi32(0, a);
54624 assert_eq_m128i(r, _mm_setzero_si128());
54625 let r = _mm_maskz_compress_epi32(0b00000101, a);
54626 let e = _mm_set_epi32(0, 0, 1, 3);
54627 assert_eq_m128i(r, e);
54628 }
54629
54630 #[simd_test(enable = "avx512f")]
54631 unsafe fn test_mm512_mask_compress_ps() {
54632 let src = _mm512_set1_ps(200.);
54633 let a = _mm512_set_ps(
54634 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54635 );
54636 let r = _mm512_mask_compress_ps(src, 0, a);
54637 assert_eq_m512(r, src);
54638 let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
54639 let e = _mm512_set_ps(
54640 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
54641 );
54642 assert_eq_m512(r, e);
54643 }
54644
54645 #[simd_test(enable = "avx512f")]
54646 unsafe fn test_mm512_maskz_compress_ps() {
54647 let a = _mm512_set_ps(
54648 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54649 );
54650 let r = _mm512_maskz_compress_ps(0, a);
54651 assert_eq_m512(r, _mm512_setzero_ps());
54652 let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
54653 let e = _mm512_set_ps(
54654 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
54655 );
54656 assert_eq_m512(r, e);
54657 }
54658
54659 #[simd_test(enable = "avx512f,avx512vl")]
54660 unsafe fn test_mm256_mask_compress_ps() {
54661 let src = _mm256_set1_ps(200.);
54662 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54663 let r = _mm256_mask_compress_ps(src, 0, a);
54664 assert_eq_m256(r, src);
54665 let r = _mm256_mask_compress_ps(src, 0b01010101, a);
54666 let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
54667 assert_eq_m256(r, e);
54668 }
54669
54670 #[simd_test(enable = "avx512f,avx512vl")]
54671 unsafe fn test_mm256_maskz_compress_ps() {
54672 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54673 let r = _mm256_maskz_compress_ps(0, a);
54674 assert_eq_m256(r, _mm256_setzero_ps());
54675 let r = _mm256_maskz_compress_ps(0b01010101, a);
54676 let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
54677 assert_eq_m256(r, e);
54678 }
54679
54680 #[simd_test(enable = "avx512f,avx512vl")]
54681 unsafe fn test_mm_mask_compress_ps() {
54682 let src = _mm_set1_ps(200.);
54683 let a = _mm_set_ps(0., 1., 2., 3.);
54684 let r = _mm_mask_compress_ps(src, 0, a);
54685 assert_eq_m128(r, src);
54686 let r = _mm_mask_compress_ps(src, 0b00000101, a);
54687 let e = _mm_set_ps(200., 200., 1., 3.);
54688 assert_eq_m128(r, e);
54689 }
54690
54691 #[simd_test(enable = "avx512f,avx512vl")]
54692 unsafe fn test_mm_maskz_compress_ps() {
54693 let a = _mm_set_ps(0., 1., 2., 3.);
54694 let r = _mm_maskz_compress_ps(0, a);
54695 assert_eq_m128(r, _mm_setzero_ps());
54696 let r = _mm_maskz_compress_ps(0b00000101, a);
54697 let e = _mm_set_ps(0., 0., 1., 3.);
54698 assert_eq_m128(r, e);
54699 }
54700
54701 #[simd_test(enable = "avx512f")]
54702 unsafe fn test_mm512_mask_compressstoreu_epi32() {
54703 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54704 let mut r = [0_i32; 16];
54705 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
54706 assert_eq!(&r, &[0_i32; 16]);
54707 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
54708 assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
54709 }
54710
54711 #[simd_test(enable = "avx512f,avx512vl")]
54712 unsafe fn test_mm256_mask_compressstoreu_epi32() {
54713 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54714 let mut r = [0_i32; 8];
54715 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
54716 assert_eq!(&r, &[0_i32; 8]);
54717 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
54718 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
54719 }
54720
54721 #[simd_test(enable = "avx512f,avx512vl")]
54722 unsafe fn test_mm_mask_compressstoreu_epi32() {
54723 let a = _mm_setr_epi32(1, 2, 3, 4);
54724 let mut r = [0_i32; 4];
54725 _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
54726 assert_eq!(&r, &[0_i32; 4]);
54727 _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
54728 assert_eq!(&r, &[1, 2, 4, 0]);
54729 }
54730
54731 #[simd_test(enable = "avx512f")]
54732 unsafe fn test_mm512_mask_compressstoreu_epi64() {
54733 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
54734 let mut r = [0_i64; 8];
54735 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
54736 assert_eq!(&r, &[0_i64; 8]);
54737 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
54738 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
54739 }
54740
54741 #[simd_test(enable = "avx512f,avx512vl")]
54742 unsafe fn test_mm256_mask_compressstoreu_epi64() {
54743 let a = _mm256_setr_epi64x(1, 2, 3, 4);
54744 let mut r = [0_i64; 4];
54745 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
54746 assert_eq!(&r, &[0_i64; 4]);
54747 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
54748 assert_eq!(&r, &[1, 2, 4, 0]);
54749 }
54750
54751 #[simd_test(enable = "avx512f,avx512vl")]
54752 unsafe fn test_mm_mask_compressstoreu_epi64() {
54753 let a = _mm_setr_epi64x(1, 2);
54754 let mut r = [0_i64; 2];
54755 _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
54756 assert_eq!(&r, &[0_i64; 2]);
54757 _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
54758 assert_eq!(&r, &[2, 0]);
54759 }
54760
54761 #[simd_test(enable = "avx512f")]
54762 unsafe fn test_mm512_mask_compressstoreu_ps() {
54763 let a = _mm512_setr_ps(
54764 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
54765 13_f32, 14_f32, 15_f32, 16_f32,
54766 );
54767 let mut r = [0_f32; 16];
54768 _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
54769 assert_eq!(&r, &[0_f32; 16]);
54770 _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
54771 assert_eq!(
54772 &r,
54773 &[
54774 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
54775 0_f32, 0_f32, 0_f32, 0_f32, 0_f32
54776 ]
54777 );
54778 }
54779
54780 #[simd_test(enable = "avx512f,avx512vl")]
54781 unsafe fn test_mm256_mask_compressstoreu_ps() {
54782 let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
54783 let mut r = [0_f32; 8];
54784 _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
54785 assert_eq!(&r, &[0_f32; 8]);
54786 _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
54787 assert_eq!(
54788 &r,
54789 &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
54790 );
54791 }
54792
54793 #[simd_test(enable = "avx512f,avx512vl")]
54794 unsafe fn test_mm_mask_compressstoreu_ps() {
54795 let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
54796 let mut r = [0.; 4];
54797 _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
54798 assert_eq!(&r, &[0.; 4]);
54799 _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
54800 assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
54801 }
54802
54803 #[simd_test(enable = "avx512f")]
54804 unsafe fn test_mm512_mask_compressstoreu_pd() {
54805 let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
54806 let mut r = [0.; 8];
54807 _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
54808 assert_eq!(&r, &[0.; 8]);
54809 _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
54810 assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
54811 }
54812
54813 #[simd_test(enable = "avx512f,avx512vl")]
54814 unsafe fn test_mm256_mask_compressstoreu_pd() {
54815 let a = _mm256_setr_pd(1., 2., 3., 4.);
54816 let mut r = [0.; 4];
54817 _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
54818 assert_eq!(&r, &[0.; 4]);
54819 _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
54820 assert_eq!(&r, &[1., 2., 4., 0.]);
54821 }
54822
54823 #[simd_test(enable = "avx512f,avx512vl")]
54824 unsafe fn test_mm_mask_compressstoreu_pd() {
54825 let a = _mm_setr_pd(1., 2.);
54826 let mut r = [0.; 2];
54827 _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
54828 assert_eq!(&r, &[0.; 2]);
54829 _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
54830 assert_eq!(&r, &[2., 0.]);
54831 }
54832
54833 #[simd_test(enable = "avx512f")]
54834 unsafe fn test_mm512_mask_expand_epi32() {
54835 let src = _mm512_set1_epi32(200);
54836 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54837 let r = _mm512_mask_expand_epi32(src, 0, a);
54838 assert_eq_m512i(r, src);
54839 let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
54840 let e = _mm512_set_epi32(
54841 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
54842 );
54843 assert_eq_m512i(r, e);
54844 }
54845
54846 #[simd_test(enable = "avx512f")]
54847 unsafe fn test_mm512_maskz_expand_epi32() {
54848 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54849 let r = _mm512_maskz_expand_epi32(0, a);
54850 assert_eq_m512i(r, _mm512_setzero_si512());
54851 let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
54852 let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
54853 assert_eq_m512i(r, e);
54854 }
54855
54856 #[simd_test(enable = "avx512f,avx512vl")]
54857 unsafe fn test_mm256_mask_expand_epi32() {
54858 let src = _mm256_set1_epi32(200);
54859 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54860 let r = _mm256_mask_expand_epi32(src, 0, a);
54861 assert_eq_m256i(r, src);
54862 let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
54863 let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
54864 assert_eq_m256i(r, e);
54865 }
54866
54867 #[simd_test(enable = "avx512f,avx512vl")]
54868 unsafe fn test_mm256_maskz_expand_epi32() {
54869 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54870 let r = _mm256_maskz_expand_epi32(0, a);
54871 assert_eq_m256i(r, _mm256_setzero_si256());
54872 let r = _mm256_maskz_expand_epi32(0b01010101, a);
54873 let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
54874 assert_eq_m256i(r, e);
54875 }
54876
54877 #[simd_test(enable = "avx512f,avx512vl")]
54878 unsafe fn test_mm_mask_expand_epi32() {
54879 let src = _mm_set1_epi32(200);
54880 let a = _mm_set_epi32(0, 1, 2, 3);
54881 let r = _mm_mask_expand_epi32(src, 0, a);
54882 assert_eq_m128i(r, src);
54883 let r = _mm_mask_expand_epi32(src, 0b00000101, a);
54884 let e = _mm_set_epi32(200, 2, 200, 3);
54885 assert_eq_m128i(r, e);
54886 }
54887
54888 #[simd_test(enable = "avx512f,avx512vl")]
54889 unsafe fn test_mm_maskz_expand_epi32() {
54890 let a = _mm_set_epi32(0, 1, 2, 3);
54891 let r = _mm_maskz_expand_epi32(0, a);
54892 assert_eq_m128i(r, _mm_setzero_si128());
54893 let r = _mm_maskz_expand_epi32(0b00000101, a);
54894 let e = _mm_set_epi32(0, 2, 0, 3);
54895 assert_eq_m128i(r, e);
54896 }
54897
54898 #[simd_test(enable = "avx512f")]
54899 unsafe fn test_mm512_mask_expand_ps() {
54900 let src = _mm512_set1_ps(200.);
54901 let a = _mm512_set_ps(
54902 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54903 );
54904 let r = _mm512_mask_expand_ps(src, 0, a);
54905 assert_eq_m512(r, src);
54906 let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
54907 let e = _mm512_set_ps(
54908 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
54909 );
54910 assert_eq_m512(r, e);
54911 }
54912
54913 #[simd_test(enable = "avx512f")]
54914 unsafe fn test_mm512_maskz_expand_ps() {
54915 let a = _mm512_set_ps(
54916 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54917 );
54918 let r = _mm512_maskz_expand_ps(0, a);
54919 assert_eq_m512(r, _mm512_setzero_ps());
54920 let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
54921 let e = _mm512_set_ps(
54922 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
54923 );
54924 assert_eq_m512(r, e);
54925 }
54926
54927 #[simd_test(enable = "avx512f,avx512vl")]
54928 unsafe fn test_mm256_mask_expand_ps() {
54929 let src = _mm256_set1_ps(200.);
54930 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54931 let r = _mm256_mask_expand_ps(src, 0, a);
54932 assert_eq_m256(r, src);
54933 let r = _mm256_mask_expand_ps(src, 0b01010101, a);
54934 let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
54935 assert_eq_m256(r, e);
54936 }
54937
54938 #[simd_test(enable = "avx512f,avx512vl")]
54939 unsafe fn test_mm256_maskz_expand_ps() {
54940 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54941 let r = _mm256_maskz_expand_ps(0, a);
54942 assert_eq_m256(r, _mm256_setzero_ps());
54943 let r = _mm256_maskz_expand_ps(0b01010101, a);
54944 let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
54945 assert_eq_m256(r, e);
54946 }
54947
54948 #[simd_test(enable = "avx512f,avx512vl")]
54949 unsafe fn test_mm_mask_expand_ps() {
54950 let src = _mm_set1_ps(200.);
54951 let a = _mm_set_ps(0., 1., 2., 3.);
54952 let r = _mm_mask_expand_ps(src, 0, a);
54953 assert_eq_m128(r, src);
54954 let r = _mm_mask_expand_ps(src, 0b00000101, a);
54955 let e = _mm_set_ps(200., 2., 200., 3.);
54956 assert_eq_m128(r, e);
54957 }
54958
54959 #[simd_test(enable = "avx512f,avx512vl")]
54960 unsafe fn test_mm_maskz_expand_ps() {
54961 let a = _mm_set_ps(0., 1., 2., 3.);
54962 let r = _mm_maskz_expand_ps(0, a);
54963 assert_eq_m128(r, _mm_setzero_ps());
54964 let r = _mm_maskz_expand_ps(0b00000101, a);
54965 let e = _mm_set_ps(0., 2., 0., 3.);
54966 assert_eq_m128(r, e);
54967 }
54968
54969 #[simd_test(enable = "avx512f")]
54970 unsafe fn test_mm512_loadu_epi32() {
54971 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
54972 let p = a.as_ptr();
54973 let r = _mm512_loadu_epi32(black_box(p));
54974 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
54975 assert_eq_m512i(r, e);
54976 }
54977
54978 #[simd_test(enable = "avx512f,avx512vl")]
54979 unsafe fn test_mm256_loadu_epi32() {
54980 let a = &[4, 3, 2, 5, 8, 9, 64, 50];
54981 let p = a.as_ptr();
54982 let r = _mm256_loadu_epi32(black_box(p));
54983 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
54984 assert_eq_m256i(r, e);
54985 }
54986
54987 #[simd_test(enable = "avx512f,avx512vl")]
54988 unsafe fn test_mm_loadu_epi32() {
54989 let a = &[4, 3, 2, 5];
54990 let p = a.as_ptr();
54991 let r = _mm_loadu_epi32(black_box(p));
54992 let e = _mm_setr_epi32(4, 3, 2, 5);
54993 assert_eq_m128i(r, e);
54994 }
54995
54996 #[simd_test(enable = "avx512f")]
54997 unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
54998 let a = _mm512_set1_epi32(9);
54999 let mut r = _mm256_undefined_si256();
55000 _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55001 let e = _mm256_set1_epi16(9);
55002 assert_eq_m256i(r, e);
55003 }
55004
55005 #[simd_test(enable = "avx512f,avx512vl")]
55006 unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
55007 let a = _mm256_set1_epi32(9);
55008 let mut r = _mm_undefined_si128();
55009 _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55010 let e = _mm_set1_epi16(9);
55011 assert_eq_m128i(r, e);
55012 }
55013
55014 #[simd_test(enable = "avx512f,avx512vl")]
55015 unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
55016 let a = _mm_set1_epi32(9);
55017 let mut r = _mm_set1_epi8(0);
55018 _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55019 let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
55020 assert_eq_m128i(r, e);
55021 }
55022
55023 #[simd_test(enable = "avx512f")]
55024 unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
55025 let a = _mm512_set1_epi32(i32::MAX);
55026 let mut r = _mm256_undefined_si256();
55027 _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55028 let e = _mm256_set1_epi16(i16::MAX);
55029 assert_eq_m256i(r, e);
55030 }
55031
55032 #[simd_test(enable = "avx512f,avx512vl")]
55033 unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
55034 let a = _mm256_set1_epi32(i32::MAX);
55035 let mut r = _mm_undefined_si128();
55036 _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55037 let e = _mm_set1_epi16(i16::MAX);
55038 assert_eq_m128i(r, e);
55039 }
55040
55041 #[simd_test(enable = "avx512f,avx512vl")]
55042 unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
55043 let a = _mm_set1_epi32(i32::MAX);
55044 let mut r = _mm_set1_epi8(0);
55045 _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55046 let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
55047 assert_eq_m128i(r, e);
55048 }
55049
55050 #[simd_test(enable = "avx512f")]
55051 unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
55052 let a = _mm512_set1_epi32(i32::MAX);
55053 let mut r = _mm256_undefined_si256();
55054 _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55055 let e = _mm256_set1_epi16(u16::MAX as i16);
55056 assert_eq_m256i(r, e);
55057 }
55058
55059 #[simd_test(enable = "avx512f,avx512vl")]
55060 unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
55061 let a = _mm256_set1_epi32(i32::MAX);
55062 let mut r = _mm_undefined_si128();
55063 _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55064 let e = _mm_set1_epi16(u16::MAX as i16);
55065 assert_eq_m128i(r, e);
55066 }
55067
55068 #[simd_test(enable = "avx512f,avx512vl")]
55069 unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
55070 let a = _mm_set1_epi32(i32::MAX);
55071 let mut r = _mm_set1_epi8(0);
55072 _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55073 let e = _mm_set_epi16(
55074 0,
55075 0,
55076 0,
55077 0,
55078 u16::MAX as i16,
55079 u16::MAX as i16,
55080 u16::MAX as i16,
55081 u16::MAX as i16,
55082 );
55083 assert_eq_m128i(r, e);
55084 }
55085
55086 #[simd_test(enable = "avx512f")]
55087 unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
55088 let a = _mm512_set1_epi32(9);
55089 let mut r = _mm_undefined_si128();
55090 _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55091 let e = _mm_set1_epi8(9);
55092 assert_eq_m128i(r, e);
55093 }
55094
55095 #[simd_test(enable = "avx512f,avx512vl")]
55096 unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
55097 let a = _mm256_set1_epi32(9);
55098 let mut r = _mm_set1_epi8(0);
55099 _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55100 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
55101 assert_eq_m128i(r, e);
55102 }
55103
55104 #[simd_test(enable = "avx512f,avx512vl")]
55105 unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
55106 let a = _mm_set1_epi32(9);
55107 let mut r = _mm_set1_epi8(0);
55108 _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55109 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
55110 assert_eq_m128i(r, e);
55111 }
55112
55113 #[simd_test(enable = "avx512f")]
55114 unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
55115 let a = _mm512_set1_epi32(i32::MAX);
55116 let mut r = _mm_undefined_si128();
55117 _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55118 let e = _mm_set1_epi8(i8::MAX);
55119 assert_eq_m128i(r, e);
55120 }
55121
55122 #[simd_test(enable = "avx512f,avx512vl")]
55123 unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
55124 let a = _mm256_set1_epi32(i32::MAX);
55125 let mut r = _mm_set1_epi8(0);
55126 _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55127 #[rustfmt::skip]
55128 let e = _mm_set_epi8(
55129 0, 0, 0, 0,
55130 0, 0, 0, 0,
55131 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55132 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55133 );
55134 assert_eq_m128i(r, e);
55135 }
55136
55137 #[simd_test(enable = "avx512f,avx512vl")]
55138 unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
55139 let a = _mm_set1_epi32(i32::MAX);
55140 let mut r = _mm_set1_epi8(0);
55141 _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55142 #[rustfmt::skip]
55143 let e = _mm_set_epi8(
55144 0, 0, 0, 0,
55145 0, 0, 0, 0,
55146 0, 0, 0, 0,
55147 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55148 );
55149 assert_eq_m128i(r, e);
55150 }
55151
55152 #[simd_test(enable = "avx512f")]
55153 unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
55154 let a = _mm512_set1_epi32(i32::MAX);
55155 let mut r = _mm_undefined_si128();
55156 _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55157 let e = _mm_set1_epi8(u8::MAX as i8);
55158 assert_eq_m128i(r, e);
55159 }
55160
55161 #[simd_test(enable = "avx512f,avx512vl")]
55162 unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
55163 let a = _mm256_set1_epi32(i32::MAX);
55164 let mut r = _mm_set1_epi8(0);
55165 _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55166 #[rustfmt::skip]
55167 let e = _mm_set_epi8(
55168 0, 0, 0, 0,
55169 0, 0, 0, 0,
55170 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55171 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55172 );
55173 assert_eq_m128i(r, e);
55174 }
55175
55176 #[simd_test(enable = "avx512f,avx512vl")]
55177 unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
55178 let a = _mm_set1_epi32(i32::MAX);
55179 let mut r = _mm_set1_epi8(0);
55180 _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55181 #[rustfmt::skip]
55182 let e = _mm_set_epi8(
55183 0, 0, 0, 0,
55184 0, 0, 0, 0,
55185 0, 0, 0, 0,
55186 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55187 );
55188 assert_eq_m128i(r, e);
55189 }
55190
55191 #[simd_test(enable = "avx512f")]
55192 unsafe fn test_mm512_storeu_epi32() {
55193 let a = _mm512_set1_epi32(9);
55194 let mut r = _mm512_undefined_epi32();
55195 _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55196 assert_eq_m512i(r, a);
55197 }
55198
55199 #[simd_test(enable = "avx512f,avx512vl")]
55200 unsafe fn test_mm256_storeu_epi32() {
55201 let a = _mm256_set1_epi32(9);
55202 let mut r = _mm256_undefined_si256();
55203 _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55204 assert_eq_m256i(r, a);
55205 }
55206
55207 #[simd_test(enable = "avx512f,avx512vl")]
55208 unsafe fn test_mm_storeu_epi32() {
55209 let a = _mm_set1_epi32(9);
55210 let mut r = _mm_undefined_si128();
55211 _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55212 assert_eq_m128i(r, a);
55213 }
55214
55215 #[simd_test(enable = "avx512f")]
55216 unsafe fn test_mm512_loadu_si512() {
55217 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
55218 let p = a.as_ptr();
55219 let r = _mm512_loadu_si512(black_box(p));
55220 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
55221 assert_eq_m512i(r, e);
55222 }
55223
55224 #[simd_test(enable = "avx512f")]
55225 unsafe fn test_mm512_storeu_si512() {
55226 let a = _mm512_set1_epi32(9);
55227 let mut r = _mm512_undefined_epi32();
55228 _mm512_storeu_si512(&mut r as *mut _ as *mut i32, a);
55229 assert_eq_m512i(r, a);
55230 }
55231
55232 #[simd_test(enable = "avx512f")]
55233 unsafe fn test_mm512_load_si512() {
55234 #[repr(align(64))]
55235 struct Align {
55236 data: [i32; 16], // 64 bytes
55237 }
55238 let a = Align {
55239 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
55240 };
55241 let p = (a.data).as_ptr();
55242 let r = _mm512_load_si512(black_box(p));
55243 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
55244 assert_eq_m512i(r, e);
55245 }
55246
55247 #[simd_test(enable = "avx512f")]
55248 unsafe fn test_mm512_store_si512() {
55249 let a = _mm512_set1_epi32(9);
55250 let mut r = _mm512_undefined_epi32();
55251 _mm512_store_si512(&mut r as *mut _ as *mut i32, a);
55252 assert_eq_m512i(r, a);
55253 }
55254
55255 #[simd_test(enable = "avx512f")]
55256 unsafe fn test_mm512_load_epi32() {
55257 #[repr(align(64))]
55258 struct Align {
55259 data: [i32; 16], // 64 bytes
55260 }
55261 let a = Align {
55262 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
55263 };
55264 let p = (a.data).as_ptr();
55265 let r = _mm512_load_epi32(black_box(p));
55266 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
55267 assert_eq_m512i(r, e);
55268 }
55269
55270 #[simd_test(enable = "avx512f,avx512vl")]
55271 unsafe fn test_mm256_load_epi32() {
55272 #[repr(align(64))]
55273 struct Align {
55274 data: [i32; 8],
55275 }
55276 let a = Align {
55277 data: [4, 3, 2, 5, 8, 9, 64, 50],
55278 };
55279 let p = (a.data).as_ptr();
55280 let r = _mm256_load_epi32(black_box(p));
55281 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
55282 assert_eq_m256i(r, e);
55283 }
55284
55285 #[simd_test(enable = "avx512f,avx512vl")]
55286 unsafe fn test_mm_load_epi32() {
55287 #[repr(align(64))]
55288 struct Align {
55289 data: [i32; 4],
55290 }
55291 let a = Align { data: [4, 3, 2, 5] };
55292 let p = (a.data).as_ptr();
55293 let r = _mm_load_epi32(black_box(p));
55294 let e = _mm_setr_epi32(4, 3, 2, 5);
55295 assert_eq_m128i(r, e);
55296 }
55297
55298 #[simd_test(enable = "avx512f")]
55299 unsafe fn test_mm512_store_epi32() {
55300 let a = _mm512_set1_epi32(9);
55301 let mut r = _mm512_undefined_epi32();
55302 _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
55303 assert_eq_m512i(r, a);
55304 }
55305
55306 #[simd_test(enable = "avx512f,avx512vl")]
55307 unsafe fn test_mm256_store_epi32() {
55308 let a = _mm256_set1_epi32(9);
55309 let mut r = _mm256_undefined_si256();
55310 _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
55311 assert_eq_m256i(r, a);
55312 }
55313
55314 #[simd_test(enable = "avx512f,avx512vl")]
55315 unsafe fn test_mm_store_epi32() {
55316 let a = _mm_set1_epi32(9);
55317 let mut r = _mm_undefined_si128();
55318 _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
55319 assert_eq_m128i(r, a);
55320 }
55321
55322 #[simd_test(enable = "avx512f")]
55323 unsafe fn test_mm512_load_ps() {
55324 #[repr(align(64))]
55325 struct Align {
55326 data: [f32; 16], // 64 bytes
55327 }
55328 let a = Align {
55329 data: [
55330 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
55331 ],
55332 };
55333 let p = (a.data).as_ptr();
55334 let r = _mm512_load_ps(black_box(p));
55335 let e = _mm512_setr_ps(
55336 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
55337 );
55338 assert_eq_m512(r, e);
55339 }
55340
55341 #[simd_test(enable = "avx512f")]
55342 unsafe fn test_mm512_store_ps() {
55343 let a = _mm512_set1_ps(9.);
55344 let mut r = _mm512_undefined_ps();
55345 _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
55346 assert_eq_m512(r, a);
55347 }
55348
55349 #[simd_test(enable = "avx512f")]
55350 unsafe fn test_mm512_mask_set1_epi32() {
55351 let src = _mm512_set1_epi32(2);
55352 let a: i32 = 11;
55353 let r = _mm512_mask_set1_epi32(src, 0, a);
55354 assert_eq_m512i(r, src);
55355 let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
55356 let e = _mm512_set1_epi32(11);
55357 assert_eq_m512i(r, e);
55358 }
55359
55360 #[simd_test(enable = "avx512f")]
55361 unsafe fn test_mm512_maskz_set1_epi32() {
55362 let a: i32 = 11;
55363 let r = _mm512_maskz_set1_epi32(0, a);
55364 assert_eq_m512i(r, _mm512_setzero_si512());
55365 let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
55366 let e = _mm512_set1_epi32(11);
55367 assert_eq_m512i(r, e);
55368 }
55369
55370 #[simd_test(enable = "avx512f,avx512vl")]
55371 unsafe fn test_mm256_mask_set1_epi32() {
55372 let src = _mm256_set1_epi32(2);
55373 let a: i32 = 11;
55374 let r = _mm256_mask_set1_epi32(src, 0, a);
55375 assert_eq_m256i(r, src);
55376 let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
55377 let e = _mm256_set1_epi32(11);
55378 assert_eq_m256i(r, e);
55379 }
55380
55381 #[simd_test(enable = "avx512f")]
55382 unsafe fn test_mm256_maskz_set1_epi32() {
55383 let a: i32 = 11;
55384 let r = _mm256_maskz_set1_epi32(0, a);
55385 assert_eq_m256i(r, _mm256_setzero_si256());
55386 let r = _mm256_maskz_set1_epi32(0b11111111, a);
55387 let e = _mm256_set1_epi32(11);
55388 assert_eq_m256i(r, e);
55389 }
55390
55391 #[simd_test(enable = "avx512f,avx512vl")]
55392 unsafe fn test_mm_mask_set1_epi32() {
55393 let src = _mm_set1_epi32(2);
55394 let a: i32 = 11;
55395 let r = _mm_mask_set1_epi32(src, 0, a);
55396 assert_eq_m128i(r, src);
55397 let r = _mm_mask_set1_epi32(src, 0b00001111, a);
55398 let e = _mm_set1_epi32(11);
55399 assert_eq_m128i(r, e);
55400 }
55401
55402 #[simd_test(enable = "avx512f")]
55403 unsafe fn test_mm_maskz_set1_epi32() {
55404 let a: i32 = 11;
55405 let r = _mm_maskz_set1_epi32(0, a);
55406 assert_eq_m128i(r, _mm_setzero_si128());
55407 let r = _mm_maskz_set1_epi32(0b00001111, a);
55408 let e = _mm_set1_epi32(11);
55409 assert_eq_m128i(r, e);
55410 }
55411
55412 #[simd_test(enable = "avx512f")]
55413 unsafe fn test_mm_mask_move_ss() {
55414 let src = _mm_set_ps(10., 11., 100., 110.);
55415 let a = _mm_set_ps(1., 2., 10., 20.);
55416 let b = _mm_set_ps(3., 4., 30., 40.);
55417 let r = _mm_mask_move_ss(src, 0, a, b);
55418 let e = _mm_set_ps(1., 2., 10., 110.);
55419 assert_eq_m128(r, e);
55420 let r = _mm_mask_move_ss(src, 0b11111111, a, b);
55421 let e = _mm_set_ps(1., 2., 10., 40.);
55422 assert_eq_m128(r, e);
55423 }
55424
55425 #[simd_test(enable = "avx512f")]
55426 unsafe fn test_mm_maskz_move_ss() {
55427 let a = _mm_set_ps(1., 2., 10., 20.);
55428 let b = _mm_set_ps(3., 4., 30., 40.);
55429 let r = _mm_maskz_move_ss(0, a, b);
55430 let e = _mm_set_ps(1., 2., 10., 0.);
55431 assert_eq_m128(r, e);
55432 let r = _mm_maskz_move_ss(0b11111111, a, b);
55433 let e = _mm_set_ps(1., 2., 10., 40.);
55434 assert_eq_m128(r, e);
55435 }
55436
55437 #[simd_test(enable = "avx512f")]
55438 unsafe fn test_mm_mask_move_sd() {
55439 let src = _mm_set_pd(10., 11.);
55440 let a = _mm_set_pd(1., 2.);
55441 let b = _mm_set_pd(3., 4.);
55442 let r = _mm_mask_move_sd(src, 0, a, b);
55443 let e = _mm_set_pd(1., 11.);
55444 assert_eq_m128d(r, e);
55445 let r = _mm_mask_move_sd(src, 0b11111111, a, b);
55446 let e = _mm_set_pd(1., 4.);
55447 assert_eq_m128d(r, e);
55448 }
55449
55450 #[simd_test(enable = "avx512f")]
55451 unsafe fn test_mm_maskz_move_sd() {
55452 let a = _mm_set_pd(1., 2.);
55453 let b = _mm_set_pd(3., 4.);
55454 let r = _mm_maskz_move_sd(0, a, b);
55455 let e = _mm_set_pd(1., 0.);
55456 assert_eq_m128d(r, e);
55457 let r = _mm_maskz_move_sd(0b11111111, a, b);
55458 let e = _mm_set_pd(1., 4.);
55459 assert_eq_m128d(r, e);
55460 }
55461
55462 #[simd_test(enable = "avx512f")]
55463 unsafe fn test_mm_mask_add_ss() {
55464 let src = _mm_set_ps(10., 11., 100., 110.);
55465 let a = _mm_set_ps(1., 2., 10., 20.);
55466 let b = _mm_set_ps(3., 4., 30., 40.);
55467 let r = _mm_mask_add_ss(src, 0, a, b);
55468 let e = _mm_set_ps(1., 2., 10., 110.);
55469 assert_eq_m128(r, e);
55470 let r = _mm_mask_add_ss(src, 0b11111111, a, b);
55471 let e = _mm_set_ps(1., 2., 10., 60.);
55472 assert_eq_m128(r, e);
55473 }
55474
55475 #[simd_test(enable = "avx512f")]
55476 unsafe fn test_mm_maskz_add_ss() {
55477 let a = _mm_set_ps(1., 2., 10., 20.);
55478 let b = _mm_set_ps(3., 4., 30., 40.);
55479 let r = _mm_maskz_add_ss(0, a, b);
55480 let e = _mm_set_ps(1., 2., 10., 0.);
55481 assert_eq_m128(r, e);
55482 let r = _mm_maskz_add_ss(0b11111111, a, b);
55483 let e = _mm_set_ps(1., 2., 10., 60.);
55484 assert_eq_m128(r, e);
55485 }
55486
55487 #[simd_test(enable = "avx512f")]
55488 unsafe fn test_mm_mask_add_sd() {
55489 let src = _mm_set_pd(10., 11.);
55490 let a = _mm_set_pd(1., 2.);
55491 let b = _mm_set_pd(3., 4.);
55492 let r = _mm_mask_add_sd(src, 0, a, b);
55493 let e = _mm_set_pd(1., 11.);
55494 assert_eq_m128d(r, e);
55495 let r = _mm_mask_add_sd(src, 0b11111111, a, b);
55496 let e = _mm_set_pd(1., 6.);
55497 assert_eq_m128d(r, e);
55498 }
55499
55500 #[simd_test(enable = "avx512f")]
55501 unsafe fn test_mm_maskz_add_sd() {
55502 let a = _mm_set_pd(1., 2.);
55503 let b = _mm_set_pd(3., 4.);
55504 let r = _mm_maskz_add_sd(0, a, b);
55505 let e = _mm_set_pd(1., 0.);
55506 assert_eq_m128d(r, e);
55507 let r = _mm_maskz_add_sd(0b11111111, a, b);
55508 let e = _mm_set_pd(1., 6.);
55509 assert_eq_m128d(r, e);
55510 }
55511
55512 #[simd_test(enable = "avx512f")]
55513 unsafe fn test_mm_mask_sub_ss() {
55514 let src = _mm_set_ps(10., 11., 100., 110.);
55515 let a = _mm_set_ps(1., 2., 10., 20.);
55516 let b = _mm_set_ps(3., 4., 30., 40.);
55517 let r = _mm_mask_sub_ss(src, 0, a, b);
55518 let e = _mm_set_ps(1., 2., 10., 110.);
55519 assert_eq_m128(r, e);
55520 let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
55521 let e = _mm_set_ps(1., 2., 10., -20.);
55522 assert_eq_m128(r, e);
55523 }
55524
55525 #[simd_test(enable = "avx512f")]
55526 unsafe fn test_mm_maskz_sub_ss() {
55527 let a = _mm_set_ps(1., 2., 10., 20.);
55528 let b = _mm_set_ps(3., 4., 30., 40.);
55529 let r = _mm_maskz_sub_ss(0, a, b);
55530 let e = _mm_set_ps(1., 2., 10., 0.);
55531 assert_eq_m128(r, e);
55532 let r = _mm_maskz_sub_ss(0b11111111, a, b);
55533 let e = _mm_set_ps(1., 2., 10., -20.);
55534 assert_eq_m128(r, e);
55535 }
55536
55537 #[simd_test(enable = "avx512f")]
55538 unsafe fn test_mm_mask_sub_sd() {
55539 let src = _mm_set_pd(10., 11.);
55540 let a = _mm_set_pd(1., 2.);
55541 let b = _mm_set_pd(3., 4.);
55542 let r = _mm_mask_sub_sd(src, 0, a, b);
55543 let e = _mm_set_pd(1., 11.);
55544 assert_eq_m128d(r, e);
55545 let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
55546 let e = _mm_set_pd(1., -2.);
55547 assert_eq_m128d(r, e);
55548 }
55549
55550 #[simd_test(enable = "avx512f")]
55551 unsafe fn test_mm_maskz_sub_sd() {
55552 let a = _mm_set_pd(1., 2.);
55553 let b = _mm_set_pd(3., 4.);
55554 let r = _mm_maskz_sub_sd(0, a, b);
55555 let e = _mm_set_pd(1., 0.);
55556 assert_eq_m128d(r, e);
55557 let r = _mm_maskz_sub_sd(0b11111111, a, b);
55558 let e = _mm_set_pd(1., -2.);
55559 assert_eq_m128d(r, e);
55560 }
55561
55562 #[simd_test(enable = "avx512f")]
55563 unsafe fn test_mm_mask_mul_ss() {
55564 let src = _mm_set_ps(10., 11., 100., 110.);
55565 let a = _mm_set_ps(1., 2., 10., 20.);
55566 let b = _mm_set_ps(3., 4., 30., 40.);
55567 let r = _mm_mask_mul_ss(src, 0, a, b);
55568 let e = _mm_set_ps(1., 2., 10., 110.);
55569 assert_eq_m128(r, e);
55570 let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
55571 let e = _mm_set_ps(1., 2., 10., 800.);
55572 assert_eq_m128(r, e);
55573 }
55574
55575 #[simd_test(enable = "avx512f")]
55576 unsafe fn test_mm_maskz_mul_ss() {
55577 let a = _mm_set_ps(1., 2., 10., 20.);
55578 let b = _mm_set_ps(3., 4., 30., 40.);
55579 let r = _mm_maskz_mul_ss(0, a, b);
55580 let e = _mm_set_ps(1., 2., 10., 0.);
55581 assert_eq_m128(r, e);
55582 let r = _mm_maskz_mul_ss(0b11111111, a, b);
55583 let e = _mm_set_ps(1., 2., 10., 800.);
55584 assert_eq_m128(r, e);
55585 }
55586
55587 #[simd_test(enable = "avx512f")]
55588 unsafe fn test_mm_mask_mul_sd() {
55589 let src = _mm_set_pd(10., 11.);
55590 let a = _mm_set_pd(1., 2.);
55591 let b = _mm_set_pd(3., 4.);
55592 let r = _mm_mask_mul_sd(src, 0, a, b);
55593 let e = _mm_set_pd(1., 11.);
55594 assert_eq_m128d(r, e);
55595 let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
55596 let e = _mm_set_pd(1., 8.);
55597 assert_eq_m128d(r, e);
55598 }
55599
55600 #[simd_test(enable = "avx512f")]
55601 unsafe fn test_mm_maskz_mul_sd() {
55602 let a = _mm_set_pd(1., 2.);
55603 let b = _mm_set_pd(3., 4.);
55604 let r = _mm_maskz_mul_sd(0, a, b);
55605 let e = _mm_set_pd(1., 0.);
55606 assert_eq_m128d(r, e);
55607 let r = _mm_maskz_mul_sd(0b11111111, a, b);
55608 let e = _mm_set_pd(1., 8.);
55609 assert_eq_m128d(r, e);
55610 }
55611
55612 #[simd_test(enable = "avx512f")]
55613 unsafe fn test_mm_mask_div_ss() {
55614 let src = _mm_set_ps(10., 11., 100., 110.);
55615 let a = _mm_set_ps(1., 2., 10., 20.);
55616 let b = _mm_set_ps(3., 4., 30., 40.);
55617 let r = _mm_mask_div_ss(src, 0, a, b);
55618 let e = _mm_set_ps(1., 2., 10., 110.);
55619 assert_eq_m128(r, e);
55620 let r = _mm_mask_div_ss(src, 0b11111111, a, b);
55621 let e = _mm_set_ps(1., 2., 10., 0.5);
55622 assert_eq_m128(r, e);
55623 }
55624
55625 #[simd_test(enable = "avx512f")]
55626 unsafe fn test_mm_maskz_div_ss() {
55627 let a = _mm_set_ps(1., 2., 10., 20.);
55628 let b = _mm_set_ps(3., 4., 30., 40.);
55629 let r = _mm_maskz_div_ss(0, a, b);
55630 let e = _mm_set_ps(1., 2., 10., 0.);
55631 assert_eq_m128(r, e);
55632 let r = _mm_maskz_div_ss(0b11111111, a, b);
55633 let e = _mm_set_ps(1., 2., 10., 0.5);
55634 assert_eq_m128(r, e);
55635 }
55636
55637 #[simd_test(enable = "avx512f")]
55638 unsafe fn test_mm_mask_div_sd() {
55639 let src = _mm_set_pd(10., 11.);
55640 let a = _mm_set_pd(1., 2.);
55641 let b = _mm_set_pd(3., 4.);
55642 let r = _mm_mask_div_sd(src, 0, a, b);
55643 let e = _mm_set_pd(1., 11.);
55644 assert_eq_m128d(r, e);
55645 let r = _mm_mask_div_sd(src, 0b11111111, a, b);
55646 let e = _mm_set_pd(1., 0.5);
55647 assert_eq_m128d(r, e);
55648 }
55649
55650 #[simd_test(enable = "avx512f")]
55651 unsafe fn test_mm_maskz_div_sd() {
55652 let a = _mm_set_pd(1., 2.);
55653 let b = _mm_set_pd(3., 4.);
55654 let r = _mm_maskz_div_sd(0, a, b);
55655 let e = _mm_set_pd(1., 0.);
55656 assert_eq_m128d(r, e);
55657 let r = _mm_maskz_div_sd(0b11111111, a, b);
55658 let e = _mm_set_pd(1., 0.5);
55659 assert_eq_m128d(r, e);
55660 }
55661
55662 #[simd_test(enable = "avx512f")]
55663 unsafe fn test_mm_mask_max_ss() {
55664 let a = _mm_set_ps(0., 1., 2., 3.);
55665 let b = _mm_set_ps(4., 5., 6., 7.);
55666 let r = _mm_mask_max_ss(a, 0, a, b);
55667 let e = _mm_set_ps(0., 1., 2., 3.);
55668 assert_eq_m128(r, e);
55669 let r = _mm_mask_max_ss(a, 0b11111111, a, b);
55670 let e = _mm_set_ps(0., 1., 2., 7.);
55671 assert_eq_m128(r, e);
55672 }
55673
55674 #[simd_test(enable = "avx512f")]
55675 unsafe fn test_mm_maskz_max_ss() {
55676 let a = _mm_set_ps(0., 1., 2., 3.);
55677 let b = _mm_set_ps(4., 5., 6., 7.);
55678 let r = _mm_maskz_max_ss(0, a, b);
55679 let e = _mm_set_ps(0., 1., 2., 0.);
55680 assert_eq_m128(r, e);
55681 let r = _mm_maskz_max_ss(0b11111111, a, b);
55682 let e = _mm_set_ps(0., 1., 2., 7.);
55683 assert_eq_m128(r, e);
55684 }
55685
55686 #[simd_test(enable = "avx512f")]
55687 unsafe fn test_mm_mask_max_sd() {
55688 let a = _mm_set_pd(0., 1.);
55689 let b = _mm_set_pd(2., 3.);
55690 let r = _mm_mask_max_sd(a, 0, a, b);
55691 let e = _mm_set_pd(0., 1.);
55692 assert_eq_m128d(r, e);
55693 let r = _mm_mask_max_sd(a, 0b11111111, a, b);
55694 let e = _mm_set_pd(0., 3.);
55695 assert_eq_m128d(r, e);
55696 }
55697
55698 #[simd_test(enable = "avx512f")]
55699 unsafe fn test_mm_maskz_max_sd() {
55700 let a = _mm_set_pd(0., 1.);
55701 let b = _mm_set_pd(2., 3.);
55702 let r = _mm_maskz_max_sd(0, a, b);
55703 let e = _mm_set_pd(0., 0.);
55704 assert_eq_m128d(r, e);
55705 let r = _mm_maskz_max_sd(0b11111111, a, b);
55706 let e = _mm_set_pd(0., 3.);
55707 assert_eq_m128d(r, e);
55708 }
55709
55710 #[simd_test(enable = "avx512f")]
55711 unsafe fn test_mm_mask_min_ss() {
55712 let a = _mm_set_ps(0., 1., 2., 3.);
55713 let b = _mm_set_ps(4., 5., 6., 7.);
55714 let r = _mm_mask_min_ss(a, 0, a, b);
55715 let e = _mm_set_ps(0., 1., 2., 3.);
55716 assert_eq_m128(r, e);
55717 let r = _mm_mask_min_ss(a, 0b11111111, a, b);
55718 let e = _mm_set_ps(0., 1., 2., 3.);
55719 assert_eq_m128(r, e);
55720 }
55721
55722 #[simd_test(enable = "avx512f")]
55723 unsafe fn test_mm_maskz_min_ss() {
55724 let a = _mm_set_ps(0., 1., 2., 3.);
55725 let b = _mm_set_ps(4., 5., 6., 7.);
55726 let r = _mm_maskz_min_ss(0, a, b);
55727 let e = _mm_set_ps(0., 1., 2., 0.);
55728 assert_eq_m128(r, e);
55729 let r = _mm_maskz_min_ss(0b11111111, a, b);
55730 let e = _mm_set_ps(0., 1., 2., 3.);
55731 assert_eq_m128(r, e);
55732 }
55733
55734 #[simd_test(enable = "avx512f")]
55735 unsafe fn test_mm_mask_min_sd() {
55736 let a = _mm_set_pd(0., 1.);
55737 let b = _mm_set_pd(2., 3.);
55738 let r = _mm_mask_min_sd(a, 0, a, b);
55739 let e = _mm_set_pd(0., 1.);
55740 assert_eq_m128d(r, e);
55741 let r = _mm_mask_min_sd(a, 0b11111111, a, b);
55742 let e = _mm_set_pd(0., 1.);
55743 assert_eq_m128d(r, e);
55744 }
55745
55746 #[simd_test(enable = "avx512f")]
55747 unsafe fn test_mm_maskz_min_sd() {
55748 let a = _mm_set_pd(0., 1.);
55749 let b = _mm_set_pd(2., 3.);
55750 let r = _mm_maskz_min_sd(0, a, b);
55751 let e = _mm_set_pd(0., 0.);
55752 assert_eq_m128d(r, e);
55753 let r = _mm_maskz_min_sd(0b11111111, a, b);
55754 let e = _mm_set_pd(0., 1.);
55755 assert_eq_m128d(r, e);
55756 }
55757
55758 #[simd_test(enable = "avx512f")]
55759 unsafe fn test_mm_mask_sqrt_ss() {
55760 let src = _mm_set_ps(10., 11., 100., 110.);
55761 let a = _mm_set_ps(1., 2., 10., 20.);
55762 let b = _mm_set_ps(3., 4., 30., 4.);
55763 let r = _mm_mask_sqrt_ss(src, 0, a, b);
55764 let e = _mm_set_ps(1., 2., 10., 110.);
55765 assert_eq_m128(r, e);
55766 let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
55767 let e = _mm_set_ps(1., 2., 10., 2.);
55768 assert_eq_m128(r, e);
55769 }
55770
55771 #[simd_test(enable = "avx512f")]
55772 unsafe fn test_mm_maskz_sqrt_ss() {
55773 let a = _mm_set_ps(1., 2., 10., 20.);
55774 let b = _mm_set_ps(3., 4., 30., 4.);
55775 let r = _mm_maskz_sqrt_ss(0, a, b);
55776 let e = _mm_set_ps(1., 2., 10., 0.);
55777 assert_eq_m128(r, e);
55778 let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
55779 let e = _mm_set_ps(1., 2., 10., 2.);
55780 assert_eq_m128(r, e);
55781 }
55782
55783 #[simd_test(enable = "avx512f")]
55784 unsafe fn test_mm_mask_sqrt_sd() {
55785 let src = _mm_set_pd(10., 11.);
55786 let a = _mm_set_pd(1., 2.);
55787 let b = _mm_set_pd(3., 4.);
55788 let r = _mm_mask_sqrt_sd(src, 0, a, b);
55789 let e = _mm_set_pd(1., 11.);
55790 assert_eq_m128d(r, e);
55791 let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
55792 let e = _mm_set_pd(1., 2.);
55793 assert_eq_m128d(r, e);
55794 }
55795
55796 #[simd_test(enable = "avx512f")]
55797 unsafe fn test_mm_maskz_sqrt_sd() {
55798 let a = _mm_set_pd(1., 2.);
55799 let b = _mm_set_pd(3., 4.);
55800 let r = _mm_maskz_sqrt_sd(0, a, b);
55801 let e = _mm_set_pd(1., 0.);
55802 assert_eq_m128d(r, e);
55803 let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
55804 let e = _mm_set_pd(1., 2.);
55805 assert_eq_m128d(r, e);
55806 }
55807
55808 #[simd_test(enable = "avx512f")]
55809 unsafe fn test_mm_rsqrt14_ss() {
55810 let a = _mm_set_ps(1., 2., 10., 20.);
55811 let b = _mm_set_ps(3., 4., 30., 4.);
55812 let r = _mm_rsqrt14_ss(a, b);
55813 let e = _mm_set_ps(1., 2., 10., 0.5);
55814 assert_eq_m128(r, e);
55815 }
55816
55817 #[simd_test(enable = "avx512f")]
55818 unsafe fn test_mm_mask_rsqrt14_ss() {
55819 let src = _mm_set_ps(10., 11., 100., 110.);
55820 let a = _mm_set_ps(1., 2., 10., 20.);
55821 let b = _mm_set_ps(3., 4., 30., 4.);
55822 let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
55823 let e = _mm_set_ps(1., 2., 10., 110.);
55824 assert_eq_m128(r, e);
55825 let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
55826 let e = _mm_set_ps(1., 2., 10., 0.5);
55827 assert_eq_m128(r, e);
55828 }
55829
55830 #[simd_test(enable = "avx512f")]
55831 unsafe fn test_mm_maskz_rsqrt14_ss() {
55832 let a = _mm_set_ps(1., 2., 10., 20.);
55833 let b = _mm_set_ps(3., 4., 30., 4.);
55834 let r = _mm_maskz_rsqrt14_ss(0, a, b);
55835 let e = _mm_set_ps(1., 2., 10., 0.);
55836 assert_eq_m128(r, e);
55837 let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
55838 let e = _mm_set_ps(1., 2., 10., 0.5);
55839 assert_eq_m128(r, e);
55840 }
55841
55842 #[simd_test(enable = "avx512f")]
55843 unsafe fn test_mm_rsqrt14_sd() {
55844 let a = _mm_set_pd(1., 2.);
55845 let b = _mm_set_pd(3., 4.);
55846 let r = _mm_rsqrt14_sd(a, b);
55847 let e = _mm_set_pd(1., 0.5);
55848 assert_eq_m128d(r, e);
55849 }
55850
55851 #[simd_test(enable = "avx512f")]
55852 unsafe fn test_mm_mask_rsqrt14_sd() {
55853 let src = _mm_set_pd(10., 11.);
55854 let a = _mm_set_pd(1., 2.);
55855 let b = _mm_set_pd(3., 4.);
55856 let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
55857 let e = _mm_set_pd(1., 11.);
55858 assert_eq_m128d(r, e);
55859 let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
55860 let e = _mm_set_pd(1., 0.5);
55861 assert_eq_m128d(r, e);
55862 }
55863
55864 #[simd_test(enable = "avx512f")]
55865 unsafe fn test_mm_maskz_rsqrt14_sd() {
55866 let a = _mm_set_pd(1., 2.);
55867 let b = _mm_set_pd(3., 4.);
55868 let r = _mm_maskz_rsqrt14_sd(0, a, b);
55869 let e = _mm_set_pd(1., 0.);
55870 assert_eq_m128d(r, e);
55871 let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
55872 let e = _mm_set_pd(1., 0.5);
55873 assert_eq_m128d(r, e);
55874 }
55875
55876 #[simd_test(enable = "avx512f")]
55877 unsafe fn test_mm_rcp14_ss() {
55878 let a = _mm_set_ps(1., 2., 10., 20.);
55879 let b = _mm_set_ps(3., 4., 30., 4.);
55880 let r = _mm_rcp14_ss(a, b);
55881 let e = _mm_set_ps(1., 2., 10., 0.25);
55882 assert_eq_m128(r, e);
55883 }
55884
55885 #[simd_test(enable = "avx512f")]
55886 unsafe fn test_mm_mask_rcp14_ss() {
55887 let src = _mm_set_ps(10., 11., 100., 110.);
55888 let a = _mm_set_ps(1., 2., 10., 20.);
55889 let b = _mm_set_ps(3., 4., 30., 4.);
55890 let r = _mm_mask_rcp14_ss(src, 0, a, b);
55891 let e = _mm_set_ps(1., 2., 10., 110.);
55892 assert_eq_m128(r, e);
55893 let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
55894 let e = _mm_set_ps(1., 2., 10., 0.25);
55895 assert_eq_m128(r, e);
55896 }
55897
55898 #[simd_test(enable = "avx512f")]
55899 unsafe fn test_mm_maskz_rcp14_ss() {
55900 let a = _mm_set_ps(1., 2., 10., 20.);
55901 let b = _mm_set_ps(3., 4., 30., 4.);
55902 let r = _mm_maskz_rcp14_ss(0, a, b);
55903 let e = _mm_set_ps(1., 2., 10., 0.);
55904 assert_eq_m128(r, e);
55905 let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
55906 let e = _mm_set_ps(1., 2., 10., 0.25);
55907 assert_eq_m128(r, e);
55908 }
55909
55910 #[simd_test(enable = "avx512f")]
55911 unsafe fn test_mm_rcp14_sd() {
55912 let a = _mm_set_pd(1., 2.);
55913 let b = _mm_set_pd(3., 4.);
55914 let r = _mm_rcp14_sd(a, b);
55915 let e = _mm_set_pd(1., 0.25);
55916 assert_eq_m128d(r, e);
55917 }
55918
55919 #[simd_test(enable = "avx512f")]
55920 unsafe fn test_mm_mask_rcp14_sd() {
55921 let src = _mm_set_pd(10., 11.);
55922 let a = _mm_set_pd(1., 2.);
55923 let b = _mm_set_pd(3., 4.);
55924 let r = _mm_mask_rcp14_sd(src, 0, a, b);
55925 let e = _mm_set_pd(1., 11.);
55926 assert_eq_m128d(r, e);
55927 let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
55928 let e = _mm_set_pd(1., 0.25);
55929 assert_eq_m128d(r, e);
55930 }
55931
55932 #[simd_test(enable = "avx512f")]
55933 unsafe fn test_mm_maskz_rcp14_sd() {
55934 let a = _mm_set_pd(1., 2.);
55935 let b = _mm_set_pd(3., 4.);
55936 let r = _mm_maskz_rcp14_sd(0, a, b);
55937 let e = _mm_set_pd(1., 0.);
55938 assert_eq_m128d(r, e);
55939 let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
55940 let e = _mm_set_pd(1., 0.25);
55941 assert_eq_m128d(r, e);
55942 }
55943
55944 #[simd_test(enable = "avx512f")]
55945 unsafe fn test_mm_getexp_ss() {
55946 let a = _mm_set1_ps(2.);
55947 let b = _mm_set1_ps(3.);
55948 let r = _mm_getexp_ss(a, b);
55949 let e = _mm_set_ps(2., 2., 2., 1.);
55950 assert_eq_m128(r, e);
55951 }
55952
55953 #[simd_test(enable = "avx512f")]
55954 unsafe fn test_mm_mask_getexp_ss() {
55955 let a = _mm_set1_ps(2.);
55956 let b = _mm_set1_ps(3.);
55957 let r = _mm_mask_getexp_ss(a, 0, a, b);
55958 let e = _mm_set_ps(2., 2., 2., 2.);
55959 assert_eq_m128(r, e);
55960 let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
55961 let e = _mm_set_ps(2., 2., 2., 1.);
55962 assert_eq_m128(r, e);
55963 }
55964
55965 #[simd_test(enable = "avx512f")]
55966 unsafe fn test_mm_maskz_getexp_ss() {
55967 let a = _mm_set1_ps(2.);
55968 let b = _mm_set1_ps(3.);
55969 let r = _mm_maskz_getexp_ss(0, a, b);
55970 let e = _mm_set_ps(2., 2., 2., 0.);
55971 assert_eq_m128(r, e);
55972 let r = _mm_maskz_getexp_ss(0b11111111, a, b);
55973 let e = _mm_set_ps(2., 2., 2., 1.);
55974 assert_eq_m128(r, e);
55975 }
55976
55977 #[simd_test(enable = "avx512f")]
55978 unsafe fn test_mm_getexp_sd() {
55979 let a = _mm_set1_pd(2.);
55980 let b = _mm_set1_pd(3.);
55981 let r = _mm_getexp_sd(a, b);
55982 let e = _mm_set_pd(2., 1.);
55983 assert_eq_m128d(r, e);
55984 }
55985
55986 #[simd_test(enable = "avx512f")]
55987 unsafe fn test_mm_mask_getexp_sd() {
55988 let a = _mm_set1_pd(2.);
55989 let b = _mm_set1_pd(3.);
55990 let r = _mm_mask_getexp_sd(a, 0, a, b);
55991 let e = _mm_set_pd(2., 2.);
55992 assert_eq_m128d(r, e);
55993 let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
55994 let e = _mm_set_pd(2., 1.);
55995 assert_eq_m128d(r, e);
55996 }
55997
55998 #[simd_test(enable = "avx512f")]
55999 unsafe fn test_mm_maskz_getexp_sd() {
56000 let a = _mm_set1_pd(2.);
56001 let b = _mm_set1_pd(3.);
56002 let r = _mm_maskz_getexp_sd(0, a, b);
56003 let e = _mm_set_pd(2., 0.);
56004 assert_eq_m128d(r, e);
56005 let r = _mm_maskz_getexp_sd(0b11111111, a, b);
56006 let e = _mm_set_pd(2., 1.);
56007 assert_eq_m128d(r, e);
56008 }
56009
56010 #[simd_test(enable = "avx512f")]
56011 unsafe fn test_mm_getmant_ss() {
56012 let a = _mm_set1_ps(20.);
56013 let b = _mm_set1_ps(10.);
56014 let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
56015 let e = _mm_set_ps(20., 20., 20., 1.25);
56016 assert_eq_m128(r, e);
56017 }
56018
56019 #[simd_test(enable = "avx512f")]
56020 unsafe fn test_mm_mask_getmant_ss() {
56021 let a = _mm_set1_ps(20.);
56022 let b = _mm_set1_ps(10.);
56023 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
56024 let e = _mm_set_ps(20., 20., 20., 20.);
56025 assert_eq_m128(r, e);
56026 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
56027 let e = _mm_set_ps(20., 20., 20., 1.25);
56028 assert_eq_m128(r, e);
56029 }
56030
56031 #[simd_test(enable = "avx512f")]
56032 unsafe fn test_mm_maskz_getmant_ss() {
56033 let a = _mm_set1_ps(20.);
56034 let b = _mm_set1_ps(10.);
56035 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
56036 let e = _mm_set_ps(20., 20., 20., 0.);
56037 assert_eq_m128(r, e);
56038 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
56039 let e = _mm_set_ps(20., 20., 20., 1.25);
56040 assert_eq_m128(r, e);
56041 }
56042
56043 #[simd_test(enable = "avx512f")]
56044 unsafe fn test_mm_getmant_sd() {
56045 let a = _mm_set1_pd(20.);
56046 let b = _mm_set1_pd(10.);
56047 let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
56048 let e = _mm_set_pd(20., 1.25);
56049 assert_eq_m128d(r, e);
56050 }
56051
56052 #[simd_test(enable = "avx512f")]
56053 unsafe fn test_mm_mask_getmant_sd() {
56054 let a = _mm_set1_pd(20.);
56055 let b = _mm_set1_pd(10.);
56056 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
56057 let e = _mm_set_pd(20., 20.);
56058 assert_eq_m128d(r, e);
56059 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
56060 let e = _mm_set_pd(20., 1.25);
56061 assert_eq_m128d(r, e);
56062 }
56063
56064 #[simd_test(enable = "avx512f")]
56065 unsafe fn test_mm_maskz_getmant_sd() {
56066 let a = _mm_set1_pd(20.);
56067 let b = _mm_set1_pd(10.);
56068 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
56069 let e = _mm_set_pd(20., 0.);
56070 assert_eq_m128d(r, e);
56071 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
56072 let e = _mm_set_pd(20., 1.25);
56073 assert_eq_m128d(r, e);
56074 }
56075
56076 #[simd_test(enable = "avx512f")]
56077 unsafe fn test_mm_roundscale_ss() {
56078 let a = _mm_set1_ps(2.2);
56079 let b = _mm_set1_ps(1.1);
56080 let r = _mm_roundscale_ss::<0>(a, b);
56081 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
56082 assert_eq_m128(r, e);
56083 }
56084
56085 #[simd_test(enable = "avx512f")]
56086 unsafe fn test_mm_mask_roundscale_ss() {
56087 let a = _mm_set1_ps(2.2);
56088 let b = _mm_set1_ps(1.1);
56089 let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
56090 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
56091 assert_eq_m128(r, e);
56092 let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
56093 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
56094 assert_eq_m128(r, e);
56095 }
56096
56097 #[simd_test(enable = "avx512f")]
56098 unsafe fn test_mm_maskz_roundscale_ss() {
56099 let a = _mm_set1_ps(2.2);
56100 let b = _mm_set1_ps(1.1);
56101 let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
56102 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
56103 assert_eq_m128(r, e);
56104 let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
56105 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
56106 assert_eq_m128(r, e);
56107 }
56108
56109 #[simd_test(enable = "avx512f")]
56110 unsafe fn test_mm_roundscale_sd() {
56111 let a = _mm_set1_pd(2.2);
56112 let b = _mm_set1_pd(1.1);
56113 let r = _mm_roundscale_sd::<0>(a, b);
56114 let e = _mm_set_pd(2.2, 1.0);
56115 assert_eq_m128d(r, e);
56116 }
56117
56118 #[simd_test(enable = "avx512f")]
56119 unsafe fn test_mm_mask_roundscale_sd() {
56120 let a = _mm_set1_pd(2.2);
56121 let b = _mm_set1_pd(1.1);
56122 let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
56123 let e = _mm_set_pd(2.2, 2.2);
56124 assert_eq_m128d(r, e);
56125 let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
56126 let e = _mm_set_pd(2.2, 1.0);
56127 assert_eq_m128d(r, e);
56128 }
56129
56130 #[simd_test(enable = "avx512f")]
56131 unsafe fn test_mm_maskz_roundscale_sd() {
56132 let a = _mm_set1_pd(2.2);
56133 let b = _mm_set1_pd(1.1);
56134 let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
56135 let e = _mm_set_pd(2.2, 0.0);
56136 assert_eq_m128d(r, e);
56137 let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
56138 let e = _mm_set_pd(2.2, 1.0);
56139 assert_eq_m128d(r, e);
56140 }
56141
56142 #[simd_test(enable = "avx512f")]
56143 unsafe fn test_mm_scalef_ss() {
56144 let a = _mm_set1_ps(1.);
56145 let b = _mm_set1_ps(3.);
56146 let r = _mm_scalef_ss(a, b);
56147 let e = _mm_set_ps(1., 1., 1., 8.);
56148 assert_eq_m128(r, e);
56149 }
56150
56151 #[simd_test(enable = "avx512f")]
56152 unsafe fn test_mm_mask_scalef_ss() {
56153 let a = _mm_set1_ps(1.);
56154 let b = _mm_set1_ps(3.);
56155 let r = _mm_mask_scalef_ss(a, 0, a, b);
56156 let e = _mm_set_ps(1., 1., 1., 1.);
56157 assert_eq_m128(r, e);
56158 let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
56159 let e = _mm_set_ps(1., 1., 1., 8.);
56160 assert_eq_m128(r, e);
56161 }
56162
56163 #[simd_test(enable = "avx512f")]
56164 unsafe fn test_mm_maskz_scalef_ss() {
56165 let a = _mm_set1_ps(1.);
56166 let b = _mm_set1_ps(3.);
56167 let r = _mm_maskz_scalef_ss(0, a, b);
56168 let e = _mm_set_ps(1., 1., 1., 0.);
56169 assert_eq_m128(r, e);
56170 let r = _mm_maskz_scalef_ss(0b11111111, a, b);
56171 let e = _mm_set_ps(1., 1., 1., 8.);
56172 assert_eq_m128(r, e);
56173 }
56174
56175 #[simd_test(enable = "avx512f")]
56176 unsafe fn test_mm_scalef_sd() {
56177 let a = _mm_set1_pd(1.);
56178 let b = _mm_set1_pd(3.);
56179 let r = _mm_scalef_sd(a, b);
56180 let e = _mm_set_pd(1., 8.);
56181 assert_eq_m128d(r, e);
56182 }
56183
56184 #[simd_test(enable = "avx512f")]
56185 unsafe fn test_mm_mask_scalef_sd() {
56186 let a = _mm_set1_pd(1.);
56187 let b = _mm_set1_pd(3.);
56188 let r = _mm_mask_scalef_sd(a, 0, a, b);
56189 let e = _mm_set_pd(1., 1.);
56190 assert_eq_m128d(r, e);
56191 let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
56192 let e = _mm_set_pd(1., 8.);
56193 assert_eq_m128d(r, e);
56194 }
56195
56196 #[simd_test(enable = "avx512f")]
56197 unsafe fn test_mm_maskz_scalef_sd() {
56198 let a = _mm_set1_pd(1.);
56199 let b = _mm_set1_pd(3.);
56200 let r = _mm_maskz_scalef_sd(0, a, b);
56201 let e = _mm_set_pd(1., 0.);
56202 assert_eq_m128d(r, e);
56203 let r = _mm_maskz_scalef_sd(0b11111111, a, b);
56204 let e = _mm_set_pd(1., 8.);
56205 assert_eq_m128d(r, e);
56206 }
56207
56208 #[simd_test(enable = "avx512f")]
56209 unsafe fn test_mm_mask_fmadd_ss() {
56210 let a = _mm_set1_ps(1.);
56211 let b = _mm_set1_ps(2.);
56212 let c = _mm_set1_ps(3.);
56213 let r = _mm_mask_fmadd_ss(a, 0, b, c);
56214 assert_eq_m128(r, a);
56215 let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
56216 let e = _mm_set_ps(1., 1., 1., 5.);
56217 assert_eq_m128(r, e);
56218 }
56219
56220 #[simd_test(enable = "avx512f")]
56221 unsafe fn test_mm_maskz_fmadd_ss() {
56222 let a = _mm_set1_ps(1.);
56223 let b = _mm_set1_ps(2.);
56224 let c = _mm_set1_ps(3.);
56225 let r = _mm_maskz_fmadd_ss(0, a, b, c);
56226 let e = _mm_set_ps(1., 1., 1., 0.);
56227 assert_eq_m128(r, e);
56228 let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
56229 let e = _mm_set_ps(1., 1., 1., 5.);
56230 assert_eq_m128(r, e);
56231 }
56232
56233 #[simd_test(enable = "avx512f")]
56234 unsafe fn test_mm_mask3_fmadd_ss() {
56235 let a = _mm_set1_ps(1.);
56236 let b = _mm_set1_ps(2.);
56237 let c = _mm_set1_ps(3.);
56238 let r = _mm_mask3_fmadd_ss(a, b, c, 0);
56239 assert_eq_m128(r, c);
56240 let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
56241 let e = _mm_set_ps(3., 3., 3., 5.);
56242 assert_eq_m128(r, e);
56243 }
56244
56245 #[simd_test(enable = "avx512f")]
56246 unsafe fn test_mm_mask_fmadd_sd() {
56247 let a = _mm_set1_pd(1.);
56248 let b = _mm_set1_pd(2.);
56249 let c = _mm_set1_pd(3.);
56250 let r = _mm_mask_fmadd_sd(a, 0, b, c);
56251 assert_eq_m128d(r, a);
56252 let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
56253 let e = _mm_set_pd(1., 5.);
56254 assert_eq_m128d(r, e);
56255 }
56256
56257 #[simd_test(enable = "avx512f")]
56258 unsafe fn test_mm_maskz_fmadd_sd() {
56259 let a = _mm_set1_pd(1.);
56260 let b = _mm_set1_pd(2.);
56261 let c = _mm_set1_pd(3.);
56262 let r = _mm_maskz_fmadd_sd(0, a, b, c);
56263 let e = _mm_set_pd(1., 0.);
56264 assert_eq_m128d(r, e);
56265 let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
56266 let e = _mm_set_pd(1., 5.);
56267 assert_eq_m128d(r, e);
56268 }
56269
56270 #[simd_test(enable = "avx512f")]
56271 unsafe fn test_mm_mask3_fmadd_sd() {
56272 let a = _mm_set1_pd(1.);
56273 let b = _mm_set1_pd(2.);
56274 let c = _mm_set1_pd(3.);
56275 let r = _mm_mask3_fmadd_sd(a, b, c, 0);
56276 assert_eq_m128d(r, c);
56277 let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
56278 let e = _mm_set_pd(3., 5.);
56279 assert_eq_m128d(r, e);
56280 }
56281
56282 #[simd_test(enable = "avx512f")]
56283 unsafe fn test_mm_mask_fmsub_ss() {
56284 let a = _mm_set1_ps(1.);
56285 let b = _mm_set1_ps(2.);
56286 let c = _mm_set1_ps(3.);
56287 let r = _mm_mask_fmsub_ss(a, 0, b, c);
56288 assert_eq_m128(r, a);
56289 let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
56290 let e = _mm_set_ps(1., 1., 1., -1.);
56291 assert_eq_m128(r, e);
56292 }
56293
56294 #[simd_test(enable = "avx512f")]
56295 unsafe fn test_mm_maskz_fmsub_ss() {
56296 let a = _mm_set1_ps(1.);
56297 let b = _mm_set1_ps(2.);
56298 let c = _mm_set1_ps(3.);
56299 let r = _mm_maskz_fmsub_ss(0, a, b, c);
56300 let e = _mm_set_ps(1., 1., 1., 0.);
56301 assert_eq_m128(r, e);
56302 let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
56303 let e = _mm_set_ps(1., 1., 1., -1.);
56304 assert_eq_m128(r, e);
56305 }
56306
56307 #[simd_test(enable = "avx512f")]
56308 unsafe fn test_mm_mask3_fmsub_ss() {
56309 let a = _mm_set1_ps(1.);
56310 let b = _mm_set1_ps(2.);
56311 let c = _mm_set1_ps(3.);
56312 let r = _mm_mask3_fmsub_ss(a, b, c, 0);
56313 assert_eq_m128(r, c);
56314 let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
56315 let e = _mm_set_ps(3., 3., 3., -1.);
56316 assert_eq_m128(r, e);
56317 }
56318
56319 #[simd_test(enable = "avx512f")]
56320 unsafe fn test_mm_mask_fmsub_sd() {
56321 let a = _mm_set1_pd(1.);
56322 let b = _mm_set1_pd(2.);
56323 let c = _mm_set1_pd(3.);
56324 let r = _mm_mask_fmsub_sd(a, 0, b, c);
56325 assert_eq_m128d(r, a);
56326 let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
56327 let e = _mm_set_pd(1., -1.);
56328 assert_eq_m128d(r, e);
56329 }
56330
56331 #[simd_test(enable = "avx512f")]
56332 unsafe fn test_mm_maskz_fmsub_sd() {
56333 let a = _mm_set1_pd(1.);
56334 let b = _mm_set1_pd(2.);
56335 let c = _mm_set1_pd(3.);
56336 let r = _mm_maskz_fmsub_sd(0, a, b, c);
56337 let e = _mm_set_pd(1., 0.);
56338 assert_eq_m128d(r, e);
56339 let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
56340 let e = _mm_set_pd(1., -1.);
56341 assert_eq_m128d(r, e);
56342 }
56343
56344 #[simd_test(enable = "avx512f")]
56345 unsafe fn test_mm_mask3_fmsub_sd() {
56346 let a = _mm_set1_pd(1.);
56347 let b = _mm_set1_pd(2.);
56348 let c = _mm_set1_pd(3.);
56349 let r = _mm_mask3_fmsub_sd(a, b, c, 0);
56350 assert_eq_m128d(r, c);
56351 let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
56352 let e = _mm_set_pd(3., -1.);
56353 assert_eq_m128d(r, e);
56354 }
56355
56356 #[simd_test(enable = "avx512f")]
56357 unsafe fn test_mm_mask_fnmadd_ss() {
56358 let a = _mm_set1_ps(1.);
56359 let b = _mm_set1_ps(2.);
56360 let c = _mm_set1_ps(3.);
56361 let r = _mm_mask_fnmadd_ss(a, 0, b, c);
56362 assert_eq_m128(r, a);
56363 let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
56364 let e = _mm_set_ps(1., 1., 1., 1.);
56365 assert_eq_m128(r, e);
56366 }
56367
56368 #[simd_test(enable = "avx512f")]
56369 unsafe fn test_mm_maskz_fnmadd_ss() {
56370 let a = _mm_set1_ps(1.);
56371 let b = _mm_set1_ps(2.);
56372 let c = _mm_set1_ps(3.);
56373 let r = _mm_maskz_fnmadd_ss(0, a, b, c);
56374 let e = _mm_set_ps(1., 1., 1., 0.);
56375 assert_eq_m128(r, e);
56376 let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
56377 let e = _mm_set_ps(1., 1., 1., 1.);
56378 assert_eq_m128(r, e);
56379 }
56380
56381 #[simd_test(enable = "avx512f")]
56382 unsafe fn test_mm_mask3_fnmadd_ss() {
56383 let a = _mm_set1_ps(1.);
56384 let b = _mm_set1_ps(2.);
56385 let c = _mm_set1_ps(3.);
56386 let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
56387 assert_eq_m128(r, c);
56388 let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
56389 let e = _mm_set_ps(3., 3., 3., 1.);
56390 assert_eq_m128(r, e);
56391 }
56392
56393 #[simd_test(enable = "avx512f")]
56394 unsafe fn test_mm_mask_fnmadd_sd() {
56395 let a = _mm_set1_pd(1.);
56396 let b = _mm_set1_pd(2.);
56397 let c = _mm_set1_pd(3.);
56398 let r = _mm_mask_fnmadd_sd(a, 0, b, c);
56399 assert_eq_m128d(r, a);
56400 let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
56401 let e = _mm_set_pd(1., 1.);
56402 assert_eq_m128d(r, e);
56403 }
56404
56405 #[simd_test(enable = "avx512f")]
56406 unsafe fn test_mm_maskz_fnmadd_sd() {
56407 let a = _mm_set1_pd(1.);
56408 let b = _mm_set1_pd(2.);
56409 let c = _mm_set1_pd(3.);
56410 let r = _mm_maskz_fnmadd_sd(0, a, b, c);
56411 let e = _mm_set_pd(1., 0.);
56412 assert_eq_m128d(r, e);
56413 let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
56414 let e = _mm_set_pd(1., 1.);
56415 assert_eq_m128d(r, e);
56416 }
56417
56418 #[simd_test(enable = "avx512f")]
56419 unsafe fn test_mm_mask3_fnmadd_sd() {
56420 let a = _mm_set1_pd(1.);
56421 let b = _mm_set1_pd(2.);
56422 let c = _mm_set1_pd(3.);
56423 let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
56424 assert_eq_m128d(r, c);
56425 let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
56426 let e = _mm_set_pd(3., 1.);
56427 assert_eq_m128d(r, e);
56428 }
56429
56430 #[simd_test(enable = "avx512f")]
56431 unsafe fn test_mm_mask_fnmsub_ss() {
56432 let a = _mm_set1_ps(1.);
56433 let b = _mm_set1_ps(2.);
56434 let c = _mm_set1_ps(3.);
56435 let r = _mm_mask_fnmsub_ss(a, 0, b, c);
56436 assert_eq_m128(r, a);
56437 let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
56438 let e = _mm_set_ps(1., 1., 1., -5.);
56439 assert_eq_m128(r, e);
56440 }
56441
56442 #[simd_test(enable = "avx512f")]
56443 unsafe fn test_mm_maskz_fnmsub_ss() {
56444 let a = _mm_set1_ps(1.);
56445 let b = _mm_set1_ps(2.);
56446 let c = _mm_set1_ps(3.);
56447 let r = _mm_maskz_fnmsub_ss(0, a, b, c);
56448 let e = _mm_set_ps(1., 1., 1., 0.);
56449 assert_eq_m128(r, e);
56450 let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
56451 let e = _mm_set_ps(1., 1., 1., -5.);
56452 assert_eq_m128(r, e);
56453 }
56454
56455 #[simd_test(enable = "avx512f")]
56456 unsafe fn test_mm_mask3_fnmsub_ss() {
56457 let a = _mm_set1_ps(1.);
56458 let b = _mm_set1_ps(2.);
56459 let c = _mm_set1_ps(3.);
56460 let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
56461 assert_eq_m128(r, c);
56462 let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
56463 let e = _mm_set_ps(3., 3., 3., -5.);
56464 assert_eq_m128(r, e);
56465 }
56466
56467 #[simd_test(enable = "avx512f")]
56468 unsafe fn test_mm_mask_fnmsub_sd() {
56469 let a = _mm_set1_pd(1.);
56470 let b = _mm_set1_pd(2.);
56471 let c = _mm_set1_pd(3.);
56472 let r = _mm_mask_fnmsub_sd(a, 0, b, c);
56473 assert_eq_m128d(r, a);
56474 let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
56475 let e = _mm_set_pd(1., -5.);
56476 assert_eq_m128d(r, e);
56477 }
56478
56479 #[simd_test(enable = "avx512f")]
56480 unsafe fn test_mm_maskz_fnmsub_sd() {
56481 let a = _mm_set1_pd(1.);
56482 let b = _mm_set1_pd(2.);
56483 let c = _mm_set1_pd(3.);
56484 let r = _mm_maskz_fnmsub_sd(0, a, b, c);
56485 let e = _mm_set_pd(1., 0.);
56486 assert_eq_m128d(r, e);
56487 let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
56488 let e = _mm_set_pd(1., -5.);
56489 assert_eq_m128d(r, e);
56490 }
56491
56492 #[simd_test(enable = "avx512f")]
56493 unsafe fn test_mm_mask3_fnmsub_sd() {
56494 let a = _mm_set1_pd(1.);
56495 let b = _mm_set1_pd(2.);
56496 let c = _mm_set1_pd(3.);
56497 let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
56498 assert_eq_m128d(r, c);
56499 let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
56500 let e = _mm_set_pd(3., -5.);
56501 assert_eq_m128d(r, e);
56502 }
56503
56504 #[simd_test(enable = "avx512f")]
56505 unsafe fn test_mm_add_round_ss() {
56506 let a = _mm_set_ps(1., 2., 10., 20.);
56507 let b = _mm_set_ps(3., 4., 30., 40.);
56508 let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56509 let e = _mm_set_ps(1., 2., 10., 60.);
56510 assert_eq_m128(r, e);
56511 }
56512
56513 #[simd_test(enable = "avx512f")]
56514 unsafe fn test_mm_mask_add_round_ss() {
56515 let src = _mm_set_ps(10., 11., 100., 110.);
56516 let a = _mm_set_ps(1., 2., 10., 20.);
56517 let b = _mm_set_ps(3., 4., 30., 40.);
56518 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56519 let e = _mm_set_ps(1., 2., 10., 110.);
56520 assert_eq_m128(r, e);
56521 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56522 src, 0b11111111, a, b,
56523 );
56524 let e = _mm_set_ps(1., 2., 10., 60.);
56525 assert_eq_m128(r, e);
56526 }
56527
56528 #[simd_test(enable = "avx512f")]
56529 unsafe fn test_mm_maskz_add_round_ss() {
56530 let a = _mm_set_ps(1., 2., 10., 20.);
56531 let b = _mm_set_ps(3., 4., 30., 40.);
56532 let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56533 let e = _mm_set_ps(1., 2., 10., 0.);
56534 assert_eq_m128(r, e);
56535 let r =
56536 _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56537 let e = _mm_set_ps(1., 2., 10., 60.);
56538 assert_eq_m128(r, e);
56539 }
56540
56541 #[simd_test(enable = "avx512f")]
56542 unsafe fn test_mm_add_round_sd() {
56543 let a = _mm_set_pd(1., 2.);
56544 let b = _mm_set_pd(3., 4.);
56545 let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56546 let e = _mm_set_pd(1., 6.);
56547 assert_eq_m128d(r, e);
56548 }
56549
56550 #[simd_test(enable = "avx512f")]
56551 unsafe fn test_mm_mask_add_round_sd() {
56552 let src = _mm_set_pd(10., 11.);
56553 let a = _mm_set_pd(1., 2.);
56554 let b = _mm_set_pd(3., 4.);
56555 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56556 let e = _mm_set_pd(1., 11.);
56557 assert_eq_m128d(r, e);
56558 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56559 src, 0b11111111, a, b,
56560 );
56561 let e = _mm_set_pd(1., 6.);
56562 assert_eq_m128d(r, e);
56563 }
56564
56565 #[simd_test(enable = "avx512f")]
56566 unsafe fn test_mm_maskz_add_round_sd() {
56567 let a = _mm_set_pd(1., 2.);
56568 let b = _mm_set_pd(3., 4.);
56569 let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56570 let e = _mm_set_pd(1., 0.);
56571 assert_eq_m128d(r, e);
56572 let r =
56573 _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56574 let e = _mm_set_pd(1., 6.);
56575 assert_eq_m128d(r, e);
56576 }
56577
56578 #[simd_test(enable = "avx512f")]
56579 unsafe fn test_mm_sub_round_ss() {
56580 let a = _mm_set_ps(1., 2., 10., 20.);
56581 let b = _mm_set_ps(3., 4., 30., 40.);
56582 let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56583 let e = _mm_set_ps(1., 2., 10., -20.);
56584 assert_eq_m128(r, e);
56585 }
56586
56587 #[simd_test(enable = "avx512f")]
56588 unsafe fn test_mm_mask_sub_round_ss() {
56589 let src = _mm_set_ps(10., 11., 100., 110.);
56590 let a = _mm_set_ps(1., 2., 10., 20.);
56591 let b = _mm_set_ps(3., 4., 30., 40.);
56592 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56593 let e = _mm_set_ps(1., 2., 10., 110.);
56594 assert_eq_m128(r, e);
56595 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56596 src, 0b11111111, a, b,
56597 );
56598 let e = _mm_set_ps(1., 2., 10., -20.);
56599 assert_eq_m128(r, e);
56600 }
56601
56602 #[simd_test(enable = "avx512f")]
56603 unsafe fn test_mm_maskz_sub_round_ss() {
56604 let a = _mm_set_ps(1., 2., 10., 20.);
56605 let b = _mm_set_ps(3., 4., 30., 40.);
56606 let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56607 let e = _mm_set_ps(1., 2., 10., 0.);
56608 assert_eq_m128(r, e);
56609 let r =
56610 _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56611 let e = _mm_set_ps(1., 2., 10., -20.);
56612 assert_eq_m128(r, e);
56613 }
56614
56615 #[simd_test(enable = "avx512f")]
56616 unsafe fn test_mm_sub_round_sd() {
56617 let a = _mm_set_pd(1., 2.);
56618 let b = _mm_set_pd(3., 4.);
56619 let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56620 let e = _mm_set_pd(1., -2.);
56621 assert_eq_m128d(r, e);
56622 }
56623
56624 #[simd_test(enable = "avx512f")]
56625 unsafe fn test_mm_mask_sub_round_sd() {
56626 let src = _mm_set_pd(10., 11.);
56627 let a = _mm_set_pd(1., 2.);
56628 let b = _mm_set_pd(3., 4.);
56629 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56630 let e = _mm_set_pd(1., 11.);
56631 assert_eq_m128d(r, e);
56632 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56633 src, 0b11111111, a, b,
56634 );
56635 let e = _mm_set_pd(1., -2.);
56636 assert_eq_m128d(r, e);
56637 }
56638
56639 #[simd_test(enable = "avx512f")]
56640 unsafe fn test_mm_maskz_sub_round_sd() {
56641 let a = _mm_set_pd(1., 2.);
56642 let b = _mm_set_pd(3., 4.);
56643 let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56644 let e = _mm_set_pd(1., 0.);
56645 assert_eq_m128d(r, e);
56646 let r =
56647 _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56648 let e = _mm_set_pd(1., -2.);
56649 assert_eq_m128d(r, e);
56650 }
56651
56652 #[simd_test(enable = "avx512f")]
56653 unsafe fn test_mm_mul_round_ss() {
56654 let a = _mm_set_ps(1., 2., 10., 20.);
56655 let b = _mm_set_ps(3., 4., 30., 40.);
56656 let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56657 let e = _mm_set_ps(1., 2., 10., 800.);
56658 assert_eq_m128(r, e);
56659 }
56660
56661 #[simd_test(enable = "avx512f")]
56662 unsafe fn test_mm_mask_mul_round_ss() {
56663 let src = _mm_set_ps(10., 11., 100., 110.);
56664 let a = _mm_set_ps(1., 2., 10., 20.);
56665 let b = _mm_set_ps(3., 4., 30., 40.);
56666 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56667 let e = _mm_set_ps(1., 2., 10., 110.);
56668 assert_eq_m128(r, e);
56669 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56670 src, 0b11111111, a, b,
56671 );
56672 let e = _mm_set_ps(1., 2., 10., 800.);
56673 assert_eq_m128(r, e);
56674 }
56675
56676 #[simd_test(enable = "avx512f")]
56677 unsafe fn test_mm_maskz_mul_round_ss() {
56678 let a = _mm_set_ps(1., 2., 10., 20.);
56679 let b = _mm_set_ps(3., 4., 30., 40.);
56680 let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56681 let e = _mm_set_ps(1., 2., 10., 0.);
56682 assert_eq_m128(r, e);
56683 let r =
56684 _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56685 let e = _mm_set_ps(1., 2., 10., 800.);
56686 assert_eq_m128(r, e);
56687 }
56688
56689 #[simd_test(enable = "avx512f")]
56690 unsafe fn test_mm_mul_round_sd() {
56691 let a = _mm_set_pd(1., 2.);
56692 let b = _mm_set_pd(3., 4.);
56693 let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56694 let e = _mm_set_pd(1., 8.);
56695 assert_eq_m128d(r, e);
56696 }
56697
56698 #[simd_test(enable = "avx512f")]
56699 unsafe fn test_mm_mask_mul_round_sd() {
56700 let src = _mm_set_pd(10., 11.);
56701 let a = _mm_set_pd(1., 2.);
56702 let b = _mm_set_pd(3., 4.);
56703 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56704 let e = _mm_set_pd(1., 11.);
56705 assert_eq_m128d(r, e);
56706 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56707 src, 0b11111111, a, b,
56708 );
56709 let e = _mm_set_pd(1., 8.);
56710 assert_eq_m128d(r, e);
56711 }
56712
56713 #[simd_test(enable = "avx512f")]
56714 unsafe fn test_mm_maskz_mul_round_sd() {
56715 let a = _mm_set_pd(1., 2.);
56716 let b = _mm_set_pd(3., 4.);
56717 let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56718 let e = _mm_set_pd(1., 0.);
56719 assert_eq_m128d(r, e);
56720 let r =
56721 _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56722 let e = _mm_set_pd(1., 8.);
56723 assert_eq_m128d(r, e);
56724 }
56725
56726 #[simd_test(enable = "avx512f")]
56727 unsafe fn test_mm_div_round_ss() {
56728 let a = _mm_set_ps(1., 2., 10., 20.);
56729 let b = _mm_set_ps(3., 4., 30., 40.);
56730 let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56731 let e = _mm_set_ps(1., 2., 10., 0.5);
56732 assert_eq_m128(r, e);
56733 }
56734
56735 #[simd_test(enable = "avx512f")]
56736 unsafe fn test_mm_mask_div_round_ss() {
56737 let src = _mm_set_ps(10., 11., 100., 110.);
56738 let a = _mm_set_ps(1., 2., 10., 20.);
56739 let b = _mm_set_ps(3., 4., 30., 40.);
56740 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56741 let e = _mm_set_ps(1., 2., 10., 110.);
56742 assert_eq_m128(r, e);
56743 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56744 src, 0b11111111, a, b,
56745 );
56746 let e = _mm_set_ps(1., 2., 10., 0.5);
56747 assert_eq_m128(r, e);
56748 }
56749
56750 #[simd_test(enable = "avx512f")]
56751 unsafe fn test_mm_maskz_div_round_ss() {
56752 let a = _mm_set_ps(1., 2., 10., 20.);
56753 let b = _mm_set_ps(3., 4., 30., 40.);
56754 let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56755 let e = _mm_set_ps(1., 2., 10., 0.);
56756 assert_eq_m128(r, e);
56757 let r =
56758 _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56759 let e = _mm_set_ps(1., 2., 10., 0.5);
56760 assert_eq_m128(r, e);
56761 }
56762
56763 #[simd_test(enable = "avx512f")]
56764 unsafe fn test_mm_div_round_sd() {
56765 let a = _mm_set_pd(1., 2.);
56766 let b = _mm_set_pd(3., 4.);
56767 let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56768 let e = _mm_set_pd(1., 0.5);
56769 assert_eq_m128d(r, e);
56770 }
56771
56772 #[simd_test(enable = "avx512f")]
56773 unsafe fn test_mm_mask_div_round_sd() {
56774 let src = _mm_set_pd(10., 11.);
56775 let a = _mm_set_pd(1., 2.);
56776 let b = _mm_set_pd(3., 4.);
56777 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56778 let e = _mm_set_pd(1., 11.);
56779 assert_eq_m128d(r, e);
56780 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56781 src, 0b11111111, a, b,
56782 );
56783 let e = _mm_set_pd(1., 0.5);
56784 assert_eq_m128d(r, e);
56785 }
56786
56787 #[simd_test(enable = "avx512f")]
56788 unsafe fn test_mm_maskz_div_round_sd() {
56789 let a = _mm_set_pd(1., 2.);
56790 let b = _mm_set_pd(3., 4.);
56791 let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56792 let e = _mm_set_pd(1., 0.);
56793 assert_eq_m128d(r, e);
56794 let r =
56795 _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56796 let e = _mm_set_pd(1., 0.5);
56797 assert_eq_m128d(r, e);
56798 }
56799
56800 #[simd_test(enable = "avx512f")]
56801 unsafe fn test_mm_max_round_ss() {
56802 let a = _mm_set_ps(0., 1., 2., 3.);
56803 let b = _mm_set_ps(4., 5., 6., 7.);
56804 let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
56805 let e = _mm_set_ps(0., 1., 2., 7.);
56806 assert_eq_m128(r, e);
56807 }
56808
56809 #[simd_test(enable = "avx512f")]
56810 unsafe fn test_mm_mask_max_round_ss() {
56811 let a = _mm_set_ps(0., 1., 2., 3.);
56812 let b = _mm_set_ps(4., 5., 6., 7.);
56813 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56814 let e = _mm_set_ps(0., 1., 2., 3.);
56815 assert_eq_m128(r, e);
56816 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56817 let e = _mm_set_ps(0., 1., 2., 7.);
56818 assert_eq_m128(r, e);
56819 }
56820
56821 #[simd_test(enable = "avx512f")]
56822 unsafe fn test_mm_maskz_max_round_ss() {
56823 let a = _mm_set_ps(0., 1., 2., 3.);
56824 let b = _mm_set_ps(4., 5., 6., 7.);
56825 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56826 let e = _mm_set_ps(0., 1., 2., 0.);
56827 assert_eq_m128(r, e);
56828 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56829 let e = _mm_set_ps(0., 1., 2., 7.);
56830 assert_eq_m128(r, e);
56831 }
56832
56833 #[simd_test(enable = "avx512f")]
56834 unsafe fn test_mm_max_round_sd() {
56835 let a = _mm_set_pd(0., 1.);
56836 let b = _mm_set_pd(2., 3.);
56837 let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
56838 let e = _mm_set_pd(0., 3.);
56839 assert_eq_m128d(r, e);
56840 }
56841
56842 #[simd_test(enable = "avx512f")]
56843 unsafe fn test_mm_mask_max_round_sd() {
56844 let a = _mm_set_pd(0., 1.);
56845 let b = _mm_set_pd(2., 3.);
56846 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56847 let e = _mm_set_pd(0., 1.);
56848 assert_eq_m128d(r, e);
56849 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56850 let e = _mm_set_pd(0., 3.);
56851 assert_eq_m128d(r, e);
56852 }
56853
56854 #[simd_test(enable = "avx512f")]
56855 unsafe fn test_mm_maskz_max_round_sd() {
56856 let a = _mm_set_pd(0., 1.);
56857 let b = _mm_set_pd(2., 3.);
56858 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56859 let e = _mm_set_pd(0., 0.);
56860 assert_eq_m128d(r, e);
56861 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56862 let e = _mm_set_pd(0., 3.);
56863 assert_eq_m128d(r, e);
56864 }
56865
56866 #[simd_test(enable = "avx512f")]
56867 unsafe fn test_mm_min_round_ss() {
56868 let a = _mm_set_ps(0., 1., 2., 3.);
56869 let b = _mm_set_ps(4., 5., 6., 7.);
56870 let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
56871 let e = _mm_set_ps(0., 1., 2., 3.);
56872 assert_eq_m128(r, e);
56873 }
56874
56875 #[simd_test(enable = "avx512f")]
56876 unsafe fn test_mm_mask_min_round_ss() {
56877 let a = _mm_set_ps(0., 1., 2., 3.);
56878 let b = _mm_set_ps(4., 5., 6., 7.);
56879 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56880 let e = _mm_set_ps(0., 1., 2., 3.);
56881 assert_eq_m128(r, e);
56882 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56883 let e = _mm_set_ps(0., 1., 2., 3.);
56884 assert_eq_m128(r, e);
56885 }
56886
56887 #[simd_test(enable = "avx512f")]
56888 unsafe fn test_mm_maskz_min_round_ss() {
56889 let a = _mm_set_ps(0., 1., 2., 3.);
56890 let b = _mm_set_ps(4., 5., 6., 7.);
56891 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56892 let e = _mm_set_ps(0., 1., 2., 0.);
56893 assert_eq_m128(r, e);
56894 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56895 let e = _mm_set_ps(0., 1., 2., 3.);
56896 assert_eq_m128(r, e);
56897 }
56898
56899 #[simd_test(enable = "avx512f")]
56900 unsafe fn test_mm_min_round_sd() {
56901 let a = _mm_set_pd(0., 1.);
56902 let b = _mm_set_pd(2., 3.);
56903 let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
56904 let e = _mm_set_pd(0., 1.);
56905 assert_eq_m128d(r, e);
56906 }
56907
56908 #[simd_test(enable = "avx512f")]
56909 unsafe fn test_mm_mask_min_round_sd() {
56910 let a = _mm_set_pd(0., 1.);
56911 let b = _mm_set_pd(2., 3.);
56912 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56913 let e = _mm_set_pd(0., 1.);
56914 assert_eq_m128d(r, e);
56915 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56916 let e = _mm_set_pd(0., 1.);
56917 assert_eq_m128d(r, e);
56918 }
56919
56920 #[simd_test(enable = "avx512f")]
56921 unsafe fn test_mm_maskz_min_round_sd() {
56922 let a = _mm_set_pd(0., 1.);
56923 let b = _mm_set_pd(2., 3.);
56924 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56925 let e = _mm_set_pd(0., 0.);
56926 assert_eq_m128d(r, e);
56927 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56928 let e = _mm_set_pd(0., 1.);
56929 assert_eq_m128d(r, e);
56930 }
56931
56932 #[simd_test(enable = "avx512f")]
56933 unsafe fn test_mm_sqrt_round_ss() {
56934 let a = _mm_set_ps(1., 2., 10., 20.);
56935 let b = _mm_set_ps(3., 4., 30., 4.);
56936 let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56937 let e = _mm_set_ps(1., 2., 10., 2.);
56938 assert_eq_m128(r, e);
56939 }
56940
56941 #[simd_test(enable = "avx512f")]
56942 unsafe fn test_mm_mask_sqrt_round_ss() {
56943 let src = _mm_set_ps(10., 11., 100., 110.);
56944 let a = _mm_set_ps(1., 2., 10., 20.);
56945 let b = _mm_set_ps(3., 4., 30., 4.);
56946 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56947 let e = _mm_set_ps(1., 2., 10., 110.);
56948 assert_eq_m128(r, e);
56949 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56950 src, 0b11111111, a, b,
56951 );
56952 let e = _mm_set_ps(1., 2., 10., 2.);
56953 assert_eq_m128(r, e);
56954 }
56955
56956 #[simd_test(enable = "avx512f")]
56957 unsafe fn test_mm_maskz_sqrt_round_ss() {
56958 let a = _mm_set_ps(1., 2., 10., 20.);
56959 let b = _mm_set_ps(3., 4., 30., 4.);
56960 let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56961 let e = _mm_set_ps(1., 2., 10., 0.);
56962 assert_eq_m128(r, e);
56963 let r =
56964 _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56965 let e = _mm_set_ps(1., 2., 10., 2.);
56966 assert_eq_m128(r, e);
56967 }
56968
56969 #[simd_test(enable = "avx512f")]
56970 unsafe fn test_mm_sqrt_round_sd() {
56971 let a = _mm_set_pd(1., 2.);
56972 let b = _mm_set_pd(3., 4.);
56973 let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56974 let e = _mm_set_pd(1., 2.);
56975 assert_eq_m128d(r, e);
56976 }
56977
56978 #[simd_test(enable = "avx512f")]
56979 unsafe fn test_mm_mask_sqrt_round_sd() {
56980 let src = _mm_set_pd(10., 11.);
56981 let a = _mm_set_pd(1., 2.);
56982 let b = _mm_set_pd(3., 4.);
56983 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56984 let e = _mm_set_pd(1., 11.);
56985 assert_eq_m128d(r, e);
56986 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56987 src, 0b11111111, a, b,
56988 );
56989 let e = _mm_set_pd(1., 2.);
56990 assert_eq_m128d(r, e);
56991 }
56992
56993 #[simd_test(enable = "avx512f")]
56994 unsafe fn test_mm_maskz_sqrt_round_sd() {
56995 let a = _mm_set_pd(1., 2.);
56996 let b = _mm_set_pd(3., 4.);
56997 let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56998 let e = _mm_set_pd(1., 0.);
56999 assert_eq_m128d(r, e);
57000 let r =
57001 _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
57002 let e = _mm_set_pd(1., 2.);
57003 assert_eq_m128d(r, e);
57004 }
57005
57006 #[simd_test(enable = "avx512f")]
57007 unsafe fn test_mm_getexp_round_ss() {
57008 let a = _mm_set1_ps(2.);
57009 let b = _mm_set1_ps(3.);
57010 let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
57011 let e = _mm_set_ps(2., 2., 2., 1.);
57012 assert_eq_m128(r, e);
57013 }
57014
57015 #[simd_test(enable = "avx512f")]
57016 unsafe fn test_mm_mask_getexp_round_ss() {
57017 let a = _mm_set1_ps(2.);
57018 let b = _mm_set1_ps(3.);
57019 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57020 let e = _mm_set_ps(2., 2., 2., 2.);
57021 assert_eq_m128(r, e);
57022 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57023 let e = _mm_set_ps(2., 2., 2., 1.);
57024 assert_eq_m128(r, e);
57025 }
57026
57027 #[simd_test(enable = "avx512f")]
57028 unsafe fn test_mm_maskz_getexp_round_ss() {
57029 let a = _mm_set1_ps(2.);
57030 let b = _mm_set1_ps(3.);
57031 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
57032 let e = _mm_set_ps(2., 2., 2., 0.);
57033 assert_eq_m128(r, e);
57034 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57035 let e = _mm_set_ps(2., 2., 2., 1.);
57036 assert_eq_m128(r, e);
57037 }
57038
57039 #[simd_test(enable = "avx512f")]
57040 unsafe fn test_mm_getexp_round_sd() {
57041 let a = _mm_set1_pd(2.);
57042 let b = _mm_set1_pd(3.);
57043 let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
57044 let e = _mm_set_pd(2., 1.);
57045 assert_eq_m128d(r, e);
57046 }
57047
57048 #[simd_test(enable = "avx512f")]
57049 unsafe fn test_mm_mask_getexp_round_sd() {
57050 let a = _mm_set1_pd(2.);
57051 let b = _mm_set1_pd(3.);
57052 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57053 let e = _mm_set_pd(2., 2.);
57054 assert_eq_m128d(r, e);
57055 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57056 let e = _mm_set_pd(2., 1.);
57057 assert_eq_m128d(r, e);
57058 }
57059
57060 #[simd_test(enable = "avx512f")]
57061 unsafe fn test_mm_maskz_getexp_round_sd() {
57062 let a = _mm_set1_pd(2.);
57063 let b = _mm_set1_pd(3.);
57064 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
57065 let e = _mm_set_pd(2., 0.);
57066 assert_eq_m128d(r, e);
57067 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57068 let e = _mm_set_pd(2., 1.);
57069 assert_eq_m128d(r, e);
57070 }
57071
57072 #[simd_test(enable = "avx512f")]
57073 unsafe fn test_mm_getmant_round_ss() {
57074 let a = _mm_set1_ps(20.);
57075 let b = _mm_set1_ps(10.);
57076 let r =
57077 _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
57078 a, b,
57079 );
57080 let e = _mm_set_ps(20., 20., 20., 1.25);
57081 assert_eq_m128(r, e);
57082 }
57083
57084 #[simd_test(enable = "avx512f")]
57085 unsafe fn test_mm_mask_getmant_round_ss() {
57086 let a = _mm_set1_ps(20.);
57087 let b = _mm_set1_ps(10.);
57088 let r = _mm_mask_getmant_round_ss::<
57089 _MM_MANT_NORM_1_2,
57090 _MM_MANT_SIGN_SRC,
57091 _MM_FROUND_CUR_DIRECTION,
57092 >(a, 0, a, b);
57093 let e = _mm_set_ps(20., 20., 20., 20.);
57094 assert_eq_m128(r, e);
57095 let r = _mm_mask_getmant_round_ss::<
57096 _MM_MANT_NORM_1_2,
57097 _MM_MANT_SIGN_SRC,
57098 _MM_FROUND_CUR_DIRECTION,
57099 >(a, 0b11111111, a, b);
57100 let e = _mm_set_ps(20., 20., 20., 1.25);
57101 assert_eq_m128(r, e);
57102 }
57103
57104 #[simd_test(enable = "avx512f")]
57105 unsafe fn test_mm_maskz_getmant_round_ss() {
57106 let a = _mm_set1_ps(20.);
57107 let b = _mm_set1_ps(10.);
57108 let r = _mm_maskz_getmant_round_ss::<
57109 _MM_MANT_NORM_1_2,
57110 _MM_MANT_SIGN_SRC,
57111 _MM_FROUND_CUR_DIRECTION,
57112 >(0, a, b);
57113 let e = _mm_set_ps(20., 20., 20., 0.);
57114 assert_eq_m128(r, e);
57115 let r = _mm_maskz_getmant_round_ss::<
57116 _MM_MANT_NORM_1_2,
57117 _MM_MANT_SIGN_SRC,
57118 _MM_FROUND_CUR_DIRECTION,
57119 >(0b11111111, a, b);
57120 let e = _mm_set_ps(20., 20., 20., 1.25);
57121 assert_eq_m128(r, e);
57122 }
57123
57124 #[simd_test(enable = "avx512f")]
57125 unsafe fn test_mm_getmant_round_sd() {
57126 let a = _mm_set1_pd(20.);
57127 let b = _mm_set1_pd(10.);
57128 let r =
57129 _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
57130 a, b,
57131 );
57132 let e = _mm_set_pd(20., 1.25);
57133 assert_eq_m128d(r, e);
57134 }
57135
57136 #[simd_test(enable = "avx512f")]
57137 unsafe fn test_mm_mask_getmant_round_sd() {
57138 let a = _mm_set1_pd(20.);
57139 let b = _mm_set1_pd(10.);
57140 let r = _mm_mask_getmant_round_sd::<
57141 _MM_MANT_NORM_1_2,
57142 _MM_MANT_SIGN_SRC,
57143 _MM_FROUND_CUR_DIRECTION,
57144 >(a, 0, a, b);
57145 let e = _mm_set_pd(20., 20.);
57146 assert_eq_m128d(r, e);
57147 let r = _mm_mask_getmant_round_sd::<
57148 _MM_MANT_NORM_1_2,
57149 _MM_MANT_SIGN_SRC,
57150 _MM_FROUND_CUR_DIRECTION,
57151 >(a, 0b11111111, a, b);
57152 let e = _mm_set_pd(20., 1.25);
57153 assert_eq_m128d(r, e);
57154 }
57155
57156 #[simd_test(enable = "avx512f")]
57157 unsafe fn test_mm_maskz_getmant_round_sd() {
57158 let a = _mm_set1_pd(20.);
57159 let b = _mm_set1_pd(10.);
57160 let r = _mm_maskz_getmant_round_sd::<
57161 _MM_MANT_NORM_1_2,
57162 _MM_MANT_SIGN_SRC,
57163 _MM_FROUND_CUR_DIRECTION,
57164 >(0, a, b);
57165 let e = _mm_set_pd(20., 0.);
57166 assert_eq_m128d(r, e);
57167 let r = _mm_maskz_getmant_round_sd::<
57168 _MM_MANT_NORM_1_2,
57169 _MM_MANT_SIGN_SRC,
57170 _MM_FROUND_CUR_DIRECTION,
57171 >(0b11111111, a, b);
57172 let e = _mm_set_pd(20., 1.25);
57173 assert_eq_m128d(r, e);
57174 }
57175
57176 #[simd_test(enable = "avx512f")]
57177 unsafe fn test_mm_roundscale_round_ss() {
57178 let a = _mm_set1_ps(2.2);
57179 let b = _mm_set1_ps(1.1);
57180 let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
57181 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
57182 assert_eq_m128(r, e);
57183 }
57184
57185 #[simd_test(enable = "avx512f")]
57186 unsafe fn test_mm_mask_roundscale_round_ss() {
57187 let a = _mm_set1_ps(2.2);
57188 let b = _mm_set1_ps(1.1);
57189 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57190 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
57191 assert_eq_m128(r, e);
57192 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57193 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
57194 assert_eq_m128(r, e);
57195 }
57196
57197 #[simd_test(enable = "avx512f")]
57198 unsafe fn test_mm_maskz_roundscale_round_ss() {
57199 let a = _mm_set1_ps(2.2);
57200 let b = _mm_set1_ps(1.1);
57201 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
57202 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
57203 assert_eq_m128(r, e);
57204 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57205 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
57206 assert_eq_m128(r, e);
57207 }
57208
57209 #[simd_test(enable = "avx512f")]
57210 unsafe fn test_mm_roundscale_round_sd() {
57211 let a = _mm_set1_pd(2.2);
57212 let b = _mm_set1_pd(1.1);
57213 let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
57214 let e = _mm_set_pd(2.2, 1.0);
57215 assert_eq_m128d(r, e);
57216 }
57217
57218 #[simd_test(enable = "avx512f")]
57219 unsafe fn test_mm_mask_roundscale_round_sd() {
57220 let a = _mm_set1_pd(2.2);
57221 let b = _mm_set1_pd(1.1);
57222 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57223 let e = _mm_set_pd(2.2, 2.2);
57224 assert_eq_m128d(r, e);
57225 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57226 let e = _mm_set_pd(2.2, 1.0);
57227 assert_eq_m128d(r, e);
57228 }
57229
57230 #[simd_test(enable = "avx512f")]
57231 unsafe fn test_mm_maskz_roundscale_round_sd() {
57232 let a = _mm_set1_pd(2.2);
57233 let b = _mm_set1_pd(1.1);
57234 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
57235 let e = _mm_set_pd(2.2, 0.0);
57236 assert_eq_m128d(r, e);
57237 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57238 let e = _mm_set_pd(2.2, 1.0);
57239 assert_eq_m128d(r, e);
57240 }
57241
57242 #[simd_test(enable = "avx512f")]
57243 unsafe fn test_mm_scalef_round_ss() {
57244 let a = _mm_set1_ps(1.);
57245 let b = _mm_set1_ps(3.);
57246 let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
57247 let e = _mm_set_ps(1., 1., 1., 8.);
57248 assert_eq_m128(r, e);
57249 }
57250
57251 #[simd_test(enable = "avx512f")]
57252 unsafe fn test_mm_mask_scalef_round_ss() {
57253 let a = _mm_set1_ps(1.);
57254 let b = _mm_set1_ps(3.);
57255 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57256 a, 0, a, b,
57257 );
57258 let e = _mm_set_ps(1., 1., 1., 1.);
57259 assert_eq_m128(r, e);
57260 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57261 a, 0b11111111, a, b,
57262 );
57263 let e = _mm_set_ps(1., 1., 1., 8.);
57264 assert_eq_m128(r, e);
57265 }
57266
57267 #[simd_test(enable = "avx512f")]
57268 unsafe fn test_mm_maskz_scalef_round_ss() {
57269 let a = _mm_set1_ps(1.);
57270 let b = _mm_set1_ps(3.);
57271 let r =
57272 _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
57273 let e = _mm_set_ps(1., 1., 1., 0.);
57274 assert_eq_m128(r, e);
57275 let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57276 0b11111111, a, b,
57277 );
57278 let e = _mm_set_ps(1., 1., 1., 8.);
57279 assert_eq_m128(r, e);
57280 }
57281
57282 #[simd_test(enable = "avx512f")]
57283 unsafe fn test_mm_scalef_round_sd() {
57284 let a = _mm_set1_pd(1.);
57285 let b = _mm_set1_pd(3.);
57286 let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
57287 let e = _mm_set_pd(1., 8.);
57288 assert_eq_m128d(r, e);
57289 }
57290
57291 #[simd_test(enable = "avx512f")]
57292 unsafe fn test_mm_mask_scalef_round_sd() {
57293 let a = _mm_set1_pd(1.);
57294 let b = _mm_set1_pd(3.);
57295 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57296 a, 0, a, b,
57297 );
57298 let e = _mm_set_pd(1., 1.);
57299 assert_eq_m128d(r, e);
57300 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57301 a, 0b11111111, a, b,
57302 );
57303 let e = _mm_set_pd(1., 8.);
57304 assert_eq_m128d(r, e);
57305 }
57306
57307 #[simd_test(enable = "avx512f")]
57308 unsafe fn test_mm_maskz_scalef_round_sd() {
57309 let a = _mm_set1_pd(1.);
57310 let b = _mm_set1_pd(3.);
57311 let r =
57312 _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
57313 let e = _mm_set_pd(1., 0.);
57314 assert_eq_m128d(r, e);
57315 let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57316 0b11111111, a, b,
57317 );
57318 let e = _mm_set_pd(1., 8.);
57319 assert_eq_m128d(r, e);
57320 }
57321
57322 #[simd_test(enable = "avx512f")]
57323 unsafe fn test_mm_fmadd_round_ss() {
57324 let a = _mm_set1_ps(1.);
57325 let b = _mm_set1_ps(2.);
57326 let c = _mm_set1_ps(3.);
57327 let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57328 let e = _mm_set_ps(1., 1., 1., 5.);
57329 assert_eq_m128(r, e);
57330 }
57331
57332 #[simd_test(enable = "avx512f")]
57333 unsafe fn test_mm_mask_fmadd_round_ss() {
57334 let a = _mm_set1_ps(1.);
57335 let b = _mm_set1_ps(2.);
57336 let c = _mm_set1_ps(3.);
57337 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57338 a, 0, b, c,
57339 );
57340 assert_eq_m128(r, a);
57341 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57342 a, 0b11111111, b, c,
57343 );
57344 let e = _mm_set_ps(1., 1., 1., 5.);
57345 assert_eq_m128(r, e);
57346 }
57347
57348 #[simd_test(enable = "avx512f")]
57349 unsafe fn test_mm_maskz_fmadd_round_ss() {
57350 let a = _mm_set1_ps(1.);
57351 let b = _mm_set1_ps(2.);
57352 let c = _mm_set1_ps(3.);
57353 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57354 0, a, b, c,
57355 );
57356 let e = _mm_set_ps(1., 1., 1., 0.);
57357 assert_eq_m128(r, e);
57358 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57359 0b11111111, a, b, c,
57360 );
57361 let e = _mm_set_ps(1., 1., 1., 5.);
57362 assert_eq_m128(r, e);
57363 }
57364
57365 #[simd_test(enable = "avx512f")]
57366 unsafe fn test_mm_mask3_fmadd_round_ss() {
57367 let a = _mm_set1_ps(1.);
57368 let b = _mm_set1_ps(2.);
57369 let c = _mm_set1_ps(3.);
57370 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57371 a, b, c, 0,
57372 );
57373 assert_eq_m128(r, c);
57374 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57375 a, b, c, 0b11111111,
57376 );
57377 let e = _mm_set_ps(3., 3., 3., 5.);
57378 assert_eq_m128(r, e);
57379 }
57380
57381 #[simd_test(enable = "avx512f")]
57382 unsafe fn test_mm_fmadd_round_sd() {
57383 let a = _mm_set1_pd(1.);
57384 let b = _mm_set1_pd(2.);
57385 let c = _mm_set1_pd(3.);
57386 let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57387 let e = _mm_set_pd(1., 5.);
57388 assert_eq_m128d(r, e);
57389 }
57390
57391 #[simd_test(enable = "avx512f")]
57392 unsafe fn test_mm_mask_fmadd_round_sd() {
57393 let a = _mm_set1_pd(1.);
57394 let b = _mm_set1_pd(2.);
57395 let c = _mm_set1_pd(3.);
57396 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57397 a, 0, b, c,
57398 );
57399 assert_eq_m128d(r, a);
57400 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57401 a, 0b11111111, b, c,
57402 );
57403 let e = _mm_set_pd(1., 5.);
57404 assert_eq_m128d(r, e);
57405 }
57406
57407 #[simd_test(enable = "avx512f")]
57408 unsafe fn test_mm_maskz_fmadd_round_sd() {
57409 let a = _mm_set1_pd(1.);
57410 let b = _mm_set1_pd(2.);
57411 let c = _mm_set1_pd(3.);
57412 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57413 0, a, b, c,
57414 );
57415 let e = _mm_set_pd(1., 0.);
57416 assert_eq_m128d(r, e);
57417 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57418 0b11111111, a, b, c,
57419 );
57420 let e = _mm_set_pd(1., 5.);
57421 assert_eq_m128d(r, e);
57422 }
57423
57424 #[simd_test(enable = "avx512f")]
57425 unsafe fn test_mm_mask3_fmadd_round_sd() {
57426 let a = _mm_set1_pd(1.);
57427 let b = _mm_set1_pd(2.);
57428 let c = _mm_set1_pd(3.);
57429 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57430 a, b, c, 0,
57431 );
57432 assert_eq_m128d(r, c);
57433 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57434 a, b, c, 0b11111111,
57435 );
57436 let e = _mm_set_pd(3., 5.);
57437 assert_eq_m128d(r, e);
57438 }
57439
57440 #[simd_test(enable = "avx512f")]
57441 unsafe fn test_mm_fmsub_round_ss() {
57442 let a = _mm_set1_ps(1.);
57443 let b = _mm_set1_ps(2.);
57444 let c = _mm_set1_ps(3.);
57445 let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57446 let e = _mm_set_ps(1., 1., 1., -1.);
57447 assert_eq_m128(r, e);
57448 }
57449
57450 #[simd_test(enable = "avx512f")]
57451 unsafe fn test_mm_mask_fmsub_round_ss() {
57452 let a = _mm_set1_ps(1.);
57453 let b = _mm_set1_ps(2.);
57454 let c = _mm_set1_ps(3.);
57455 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57456 a, 0, b, c,
57457 );
57458 assert_eq_m128(r, a);
57459 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57460 a, 0b11111111, b, c,
57461 );
57462 let e = _mm_set_ps(1., 1., 1., -1.);
57463 assert_eq_m128(r, e);
57464 }
57465
57466 #[simd_test(enable = "avx512f")]
57467 unsafe fn test_mm_maskz_fmsub_round_ss() {
57468 let a = _mm_set1_ps(1.);
57469 let b = _mm_set1_ps(2.);
57470 let c = _mm_set1_ps(3.);
57471 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57472 0, a, b, c,
57473 );
57474 let e = _mm_set_ps(1., 1., 1., 0.);
57475 assert_eq_m128(r, e);
57476 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57477 0b11111111, a, b, c,
57478 );
57479 let e = _mm_set_ps(1., 1., 1., -1.);
57480 assert_eq_m128(r, e);
57481 }
57482
57483 #[simd_test(enable = "avx512f")]
57484 unsafe fn test_mm_mask3_fmsub_round_ss() {
57485 let a = _mm_set1_ps(1.);
57486 let b = _mm_set1_ps(2.);
57487 let c = _mm_set1_ps(3.);
57488 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57489 a, b, c, 0,
57490 );
57491 assert_eq_m128(r, c);
57492 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57493 a, b, c, 0b11111111,
57494 );
57495 let e = _mm_set_ps(3., 3., 3., -1.);
57496 assert_eq_m128(r, e);
57497 }
57498
57499 #[simd_test(enable = "avx512f")]
57500 unsafe fn test_mm_fmsub_round_sd() {
57501 let a = _mm_set1_pd(1.);
57502 let b = _mm_set1_pd(2.);
57503 let c = _mm_set1_pd(3.);
57504 let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57505 let e = _mm_set_pd(1., -1.);
57506 assert_eq_m128d(r, e);
57507 }
57508
57509 #[simd_test(enable = "avx512f")]
57510 unsafe fn test_mm_mask_fmsub_round_sd() {
57511 let a = _mm_set1_pd(1.);
57512 let b = _mm_set1_pd(2.);
57513 let c = _mm_set1_pd(3.);
57514 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57515 a, 0, b, c,
57516 );
57517 assert_eq_m128d(r, a);
57518 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57519 a, 0b11111111, b, c,
57520 );
57521 let e = _mm_set_pd(1., -1.);
57522 assert_eq_m128d(r, e);
57523 }
57524
57525 #[simd_test(enable = "avx512f")]
57526 unsafe fn test_mm_maskz_fmsub_round_sd() {
57527 let a = _mm_set1_pd(1.);
57528 let b = _mm_set1_pd(2.);
57529 let c = _mm_set1_pd(3.);
57530 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57531 0, a, b, c,
57532 );
57533 let e = _mm_set_pd(1., 0.);
57534 assert_eq_m128d(r, e);
57535 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57536 0b11111111, a, b, c,
57537 );
57538 let e = _mm_set_pd(1., -1.);
57539 assert_eq_m128d(r, e);
57540 }
57541
57542 #[simd_test(enable = "avx512f")]
57543 unsafe fn test_mm_mask3_fmsub_round_sd() {
57544 let a = _mm_set1_pd(1.);
57545 let b = _mm_set1_pd(2.);
57546 let c = _mm_set1_pd(3.);
57547 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57548 a, b, c, 0,
57549 );
57550 assert_eq_m128d(r, c);
57551 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57552 a, b, c, 0b11111111,
57553 );
57554 let e = _mm_set_pd(3., -1.);
57555 assert_eq_m128d(r, e);
57556 }
57557
57558 #[simd_test(enable = "avx512f")]
57559 unsafe fn test_mm_fnmadd_round_ss() {
57560 let a = _mm_set1_ps(1.);
57561 let b = _mm_set1_ps(2.);
57562 let c = _mm_set1_ps(3.);
57563 let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57564 let e = _mm_set_ps(1., 1., 1., 1.);
57565 assert_eq_m128(r, e);
57566 }
57567
57568 #[simd_test(enable = "avx512f")]
57569 unsafe fn test_mm_mask_fnmadd_round_ss() {
57570 let a = _mm_set1_ps(1.);
57571 let b = _mm_set1_ps(2.);
57572 let c = _mm_set1_ps(3.);
57573 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57574 a, 0, b, c,
57575 );
57576 assert_eq_m128(r, a);
57577 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57578 a, 0b11111111, b, c,
57579 );
57580 let e = _mm_set_ps(1., 1., 1., 1.);
57581 assert_eq_m128(r, e);
57582 }
57583
57584 #[simd_test(enable = "avx512f")]
57585 unsafe fn test_mm_maskz_fnmadd_round_ss() {
57586 let a = _mm_set1_ps(1.);
57587 let b = _mm_set1_ps(2.);
57588 let c = _mm_set1_ps(3.);
57589 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57590 0, a, b, c,
57591 );
57592 let e = _mm_set_ps(1., 1., 1., 0.);
57593 assert_eq_m128(r, e);
57594 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57595 0b11111111, a, b, c,
57596 );
57597 let e = _mm_set_ps(1., 1., 1., 1.);
57598 assert_eq_m128(r, e);
57599 }
57600
57601 #[simd_test(enable = "avx512f")]
57602 unsafe fn test_mm_mask3_fnmadd_round_ss() {
57603 let a = _mm_set1_ps(1.);
57604 let b = _mm_set1_ps(2.);
57605 let c = _mm_set1_ps(3.);
57606 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57607 a, b, c, 0,
57608 );
57609 assert_eq_m128(r, c);
57610 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57611 a, b, c, 0b11111111,
57612 );
57613 let e = _mm_set_ps(3., 3., 3., 1.);
57614 assert_eq_m128(r, e);
57615 }
57616
57617 #[simd_test(enable = "avx512f")]
57618 unsafe fn test_mm_fnmadd_round_sd() {
57619 let a = _mm_set1_pd(1.);
57620 let b = _mm_set1_pd(2.);
57621 let c = _mm_set1_pd(3.);
57622 let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57623 let e = _mm_set_pd(1., 1.);
57624 assert_eq_m128d(r, e);
57625 }
57626
57627 #[simd_test(enable = "avx512f")]
57628 unsafe fn test_mm_mask_fnmadd_round_sd() {
57629 let a = _mm_set1_pd(1.);
57630 let b = _mm_set1_pd(2.);
57631 let c = _mm_set1_pd(3.);
57632 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57633 a, 0, b, c,
57634 );
57635 assert_eq_m128d(r, a);
57636 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57637 a, 0b11111111, b, c,
57638 );
57639 let e = _mm_set_pd(1., 1.);
57640 assert_eq_m128d(r, e);
57641 }
57642
57643 #[simd_test(enable = "avx512f")]
57644 unsafe fn test_mm_maskz_fnmadd_round_sd() {
57645 let a = _mm_set1_pd(1.);
57646 let b = _mm_set1_pd(2.);
57647 let c = _mm_set1_pd(3.);
57648 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57649 0, a, b, c,
57650 );
57651 let e = _mm_set_pd(1., 0.);
57652 assert_eq_m128d(r, e);
57653 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57654 0b11111111, a, b, c,
57655 );
57656 let e = _mm_set_pd(1., 1.);
57657 assert_eq_m128d(r, e);
57658 }
57659
57660 #[simd_test(enable = "avx512f")]
57661 unsafe fn test_mm_mask3_fnmadd_round_sd() {
57662 let a = _mm_set1_pd(1.);
57663 let b = _mm_set1_pd(2.);
57664 let c = _mm_set1_pd(3.);
57665 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57666 a, b, c, 0,
57667 );
57668 assert_eq_m128d(r, c);
57669 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57670 a, b, c, 0b11111111,
57671 );
57672 let e = _mm_set_pd(3., 1.);
57673 assert_eq_m128d(r, e);
57674 }
57675
57676 #[simd_test(enable = "avx512f")]
57677 unsafe fn test_mm_fnmsub_round_ss() {
57678 let a = _mm_set1_ps(1.);
57679 let b = _mm_set1_ps(2.);
57680 let c = _mm_set1_ps(3.);
57681 let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57682 let e = _mm_set_ps(1., 1., 1., -5.);
57683 assert_eq_m128(r, e);
57684 }
57685
57686 #[simd_test(enable = "avx512f")]
57687 unsafe fn test_mm_mask_fnmsub_round_ss() {
57688 let a = _mm_set1_ps(1.);
57689 let b = _mm_set1_ps(2.);
57690 let c = _mm_set1_ps(3.);
57691 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57692 a, 0, b, c,
57693 );
57694 assert_eq_m128(r, a);
57695 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57696 a, 0b11111111, b, c,
57697 );
57698 let e = _mm_set_ps(1., 1., 1., -5.);
57699 assert_eq_m128(r, e);
57700 }
57701
57702 #[simd_test(enable = "avx512f")]
57703 unsafe fn test_mm_maskz_fnmsub_round_ss() {
57704 let a = _mm_set1_ps(1.);
57705 let b = _mm_set1_ps(2.);
57706 let c = _mm_set1_ps(3.);
57707 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57708 0, a, b, c,
57709 );
57710 let e = _mm_set_ps(1., 1., 1., 0.);
57711 assert_eq_m128(r, e);
57712 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57713 0b11111111, a, b, c,
57714 );
57715 let e = _mm_set_ps(1., 1., 1., -5.);
57716 assert_eq_m128(r, e);
57717 }
57718
57719 #[simd_test(enable = "avx512f")]
57720 unsafe fn test_mm_mask3_fnmsub_round_ss() {
57721 let a = _mm_set1_ps(1.);
57722 let b = _mm_set1_ps(2.);
57723 let c = _mm_set1_ps(3.);
57724 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57725 a, b, c, 0,
57726 );
57727 assert_eq_m128(r, c);
57728 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57729 a, b, c, 0b11111111,
57730 );
57731 let e = _mm_set_ps(3., 3., 3., -5.);
57732 assert_eq_m128(r, e);
57733 }
57734
57735 #[simd_test(enable = "avx512f")]
57736 unsafe fn test_mm_fnmsub_round_sd() {
57737 let a = _mm_set1_pd(1.);
57738 let b = _mm_set1_pd(2.);
57739 let c = _mm_set1_pd(3.);
57740 let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57741 let e = _mm_set_pd(1., -5.);
57742 assert_eq_m128d(r, e);
57743 }
57744
57745 #[simd_test(enable = "avx512f")]
57746 unsafe fn test_mm_mask_fnmsub_round_sd() {
57747 let a = _mm_set1_pd(1.);
57748 let b = _mm_set1_pd(2.);
57749 let c = _mm_set1_pd(3.);
57750 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57751 a, 0, b, c,
57752 );
57753 assert_eq_m128d(r, a);
57754 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57755 a, 0b11111111, b, c,
57756 );
57757 let e = _mm_set_pd(1., -5.);
57758 assert_eq_m128d(r, e);
57759 }
57760
57761 #[simd_test(enable = "avx512f")]
57762 unsafe fn test_mm_maskz_fnmsub_round_sd() {
57763 let a = _mm_set1_pd(1.);
57764 let b = _mm_set1_pd(2.);
57765 let c = _mm_set1_pd(3.);
57766 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57767 0, a, b, c,
57768 );
57769 let e = _mm_set_pd(1., 0.);
57770 assert_eq_m128d(r, e);
57771 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57772 0b11111111, a, b, c,
57773 );
57774 let e = _mm_set_pd(1., -5.);
57775 assert_eq_m128d(r, e);
57776 }
57777
57778 #[simd_test(enable = "avx512f")]
57779 unsafe fn test_mm_mask3_fnmsub_round_sd() {
57780 let a = _mm_set1_pd(1.);
57781 let b = _mm_set1_pd(2.);
57782 let c = _mm_set1_pd(3.);
57783 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57784 a, b, c, 0,
57785 );
57786 assert_eq_m128d(r, c);
57787 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57788 a, b, c, 0b11111111,
57789 );
57790 let e = _mm_set_pd(3., -5.);
57791 assert_eq_m128d(r, e);
57792 }
57793
57794 #[simd_test(enable = "avx512f")]
57795 unsafe fn test_mm_fixupimm_ss() {
57796 let a = _mm_set_ps(0., 0., 0., f32::NAN);
57797 let b = _mm_set1_ps(f32::MAX);
57798 let c = _mm_set1_epi32(i32::MAX);
57799 let r = _mm_fixupimm_ss::<5>(a, b, c);
57800 let e = _mm_set_ps(0., 0., 0., -0.0);
57801 assert_eq_m128(r, e);
57802 }
57803
57804 #[simd_test(enable = "avx512f")]
57805 unsafe fn test_mm_mask_fixupimm_ss() {
57806 let a = _mm_set_ps(0., 0., 0., f32::NAN);
57807 let b = _mm_set1_ps(f32::MAX);
57808 let c = _mm_set1_epi32(i32::MAX);
57809 let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
57810 let e = _mm_set_ps(0., 0., 0., -0.0);
57811 assert_eq_m128(r, e);
57812 }
57813
57814 #[simd_test(enable = "avx512f")]
57815 unsafe fn test_mm_maskz_fixupimm_ss() {
57816 let a = _mm_set_ps(0., 0., 0., f32::NAN);
57817 let b = _mm_set1_ps(f32::MAX);
57818 let c = _mm_set1_epi32(i32::MAX);
57819 let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
57820 let e = _mm_set_ps(0., 0., 0., 0.0);
57821 assert_eq_m128(r, e);
57822 let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
57823 let e = _mm_set_ps(0., 0., 0., -0.0);
57824 assert_eq_m128(r, e);
57825 }
57826
57827 #[simd_test(enable = "avx512f")]
57828 unsafe fn test_mm_fixupimm_sd() {
57829 let a = _mm_set_pd(0., f64::NAN);
57830 let b = _mm_set1_pd(f64::MAX);
57831 let c = _mm_set1_epi64x(i32::MAX as i64);
57832 let r = _mm_fixupimm_sd::<5>(a, b, c);
57833 let e = _mm_set_pd(0., -0.0);
57834 assert_eq_m128d(r, e);
57835 }
57836
57837 #[simd_test(enable = "avx512f")]
57838 unsafe fn test_mm_mask_fixupimm_sd() {
57839 let a = _mm_set_pd(0., f64::NAN);
57840 let b = _mm_set1_pd(f64::MAX);
57841 let c = _mm_set1_epi64x(i32::MAX as i64);
57842 let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
57843 let e = _mm_set_pd(0., -0.0);
57844 assert_eq_m128d(r, e);
57845 }
57846
57847 #[simd_test(enable = "avx512f")]
57848 unsafe fn test_mm_maskz_fixupimm_sd() {
57849 let a = _mm_set_pd(0., f64::NAN);
57850 let b = _mm_set1_pd(f64::MAX);
57851 let c = _mm_set1_epi64x(i32::MAX as i64);
57852 let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
57853 let e = _mm_set_pd(0., 0.0);
57854 assert_eq_m128d(r, e);
57855 let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
57856 let e = _mm_set_pd(0., -0.0);
57857 assert_eq_m128d(r, e);
57858 }
57859
57860 #[simd_test(enable = "avx512f")]
57861 unsafe fn test_mm_fixupimm_round_ss() {
57862 let a = _mm_set_ps(1., 0., 0., f32::NAN);
57863 let b = _mm_set1_ps(f32::MAX);
57864 let c = _mm_set1_epi32(i32::MAX);
57865 let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
57866 let e = _mm_set_ps(1., 0., 0., -0.0);
57867 assert_eq_m128(r, e);
57868 }
57869
57870 #[simd_test(enable = "avx512f")]
57871 unsafe fn test_mm_mask_fixupimm_round_ss() {
57872 let a = _mm_set_ps(0., 0., 0., f32::NAN);
57873 let b = _mm_set1_ps(f32::MAX);
57874 let c = _mm_set1_epi32(i32::MAX);
57875 let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
57876 let e = _mm_set_ps(0., 0., 0., -0.0);
57877 assert_eq_m128(r, e);
57878 }
57879
57880 #[simd_test(enable = "avx512f")]
57881 unsafe fn test_mm_maskz_fixupimm_round_ss() {
57882 let a = _mm_set_ps(0., 0., 0., f32::NAN);
57883 let b = _mm_set1_ps(f32::MAX);
57884 let c = _mm_set1_epi32(i32::MAX);
57885 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
57886 let e = _mm_set_ps(0., 0., 0., 0.0);
57887 assert_eq_m128(r, e);
57888 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
57889 let e = _mm_set_ps(0., 0., 0., -0.0);
57890 assert_eq_m128(r, e);
57891 }
57892
57893 #[simd_test(enable = "avx512f")]
57894 unsafe fn test_mm_fixupimm_round_sd() {
57895 let a = _mm_set_pd(0., f64::NAN);
57896 let b = _mm_set1_pd(f64::MAX);
57897 let c = _mm_set1_epi64x(i32::MAX as i64);
57898 let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
57899 let e = _mm_set_pd(0., -0.0);
57900 assert_eq_m128d(r, e);
57901 }
57902
57903 #[simd_test(enable = "avx512f")]
57904 unsafe fn test_mm_mask_fixupimm_round_sd() {
57905 let a = _mm_set_pd(0., f64::NAN);
57906 let b = _mm_set1_pd(f64::MAX);
57907 let c = _mm_set1_epi64x(i32::MAX as i64);
57908 let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
57909 let e = _mm_set_pd(0., -0.0);
57910 assert_eq_m128d(r, e);
57911 }
57912
57913 #[simd_test(enable = "avx512f")]
57914 unsafe fn test_mm_maskz_fixupimm_round_sd() {
57915 let a = _mm_set_pd(0., f64::NAN);
57916 let b = _mm_set1_pd(f64::MAX);
57917 let c = _mm_set1_epi64x(i32::MAX as i64);
57918 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
57919 let e = _mm_set_pd(0., 0.0);
57920 assert_eq_m128d(r, e);
57921 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
57922 let e = _mm_set_pd(0., -0.0);
57923 assert_eq_m128d(r, e);
57924 }
57925
57926 #[simd_test(enable = "avx512f")]
57927 unsafe fn test_mm_mask_cvtss_sd() {
57928 let a = _mm_set_pd(6., -7.5);
57929 let b = _mm_set_ps(0., -0.5, 1., -1.5);
57930 let r = _mm_mask_cvtss_sd(a, 0, a, b);
57931 assert_eq_m128d(r, a);
57932 let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
57933 let e = _mm_set_pd(6., -1.5);
57934 assert_eq_m128d(r, e);
57935 }
57936
57937 #[simd_test(enable = "avx512f")]
57938 unsafe fn test_mm_maskz_cvtss_sd() {
57939 let a = _mm_set_pd(6., -7.5);
57940 let b = _mm_set_ps(0., -0.5, 1., -1.5);
57941 let r = _mm_maskz_cvtss_sd(0, a, b);
57942 let e = _mm_set_pd(6., 0.);
57943 assert_eq_m128d(r, e);
57944 let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
57945 let e = _mm_set_pd(6., -1.5);
57946 assert_eq_m128d(r, e);
57947 }
57948
57949 #[simd_test(enable = "avx512f")]
57950 unsafe fn test_mm_mask_cvtsd_ss() {
57951 let a = _mm_set_ps(0., -0.5, 1., -1.5);
57952 let b = _mm_set_pd(6., -7.5);
57953 let r = _mm_mask_cvtsd_ss(a, 0, a, b);
57954 assert_eq_m128(r, a);
57955 let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
57956 let e = _mm_set_ps(0., -0.5, 1., -7.5);
57957 assert_eq_m128(r, e);
57958 }
57959
57960 #[simd_test(enable = "avx512f")]
57961 unsafe fn test_mm_maskz_cvtsd_ss() {
57962 let a = _mm_set_ps(0., -0.5, 1., -1.5);
57963 let b = _mm_set_pd(6., -7.5);
57964 let r = _mm_maskz_cvtsd_ss(0, a, b);
57965 let e = _mm_set_ps(0., -0.5, 1., 0.);
57966 assert_eq_m128(r, e);
57967 let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
57968 let e = _mm_set_ps(0., -0.5, 1., -7.5);
57969 assert_eq_m128(r, e);
57970 }
57971
57972 #[simd_test(enable = "avx512f")]
57973 unsafe fn test_mm_cvt_roundss_sd() {
57974 let a = _mm_set_pd(6., -7.5);
57975 let b = _mm_set_ps(0., -0.5, 1., -1.5);
57976 let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
57977 let e = _mm_set_pd(6., -1.5);
57978 assert_eq_m128d(r, e);
57979 }
57980
57981 #[simd_test(enable = "avx512f")]
57982 unsafe fn test_mm_mask_cvt_roundss_sd() {
57983 let a = _mm_set_pd(6., -7.5);
57984 let b = _mm_set_ps(0., -0.5, 1., -1.5);
57985 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57986 assert_eq_m128d(r, a);
57987 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57988 let e = _mm_set_pd(6., -1.5);
57989 assert_eq_m128d(r, e);
57990 }
57991
57992 #[simd_test(enable = "avx512f")]
57993 unsafe fn test_mm_maskz_cvt_roundss_sd() {
57994 let a = _mm_set_pd(6., -7.5);
57995 let b = _mm_set_ps(0., -0.5, 1., -1.5);
57996 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
57997 let e = _mm_set_pd(6., 0.);
57998 assert_eq_m128d(r, e);
57999 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58000 let e = _mm_set_pd(6., -1.5);
58001 assert_eq_m128d(r, e);
58002 }
58003
58004 #[simd_test(enable = "avx512f")]
58005 unsafe fn test_mm_cvt_roundsd_ss() {
58006 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58007 let b = _mm_set_pd(6., -7.5);
58008 let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58009 let e = _mm_set_ps(0., -0.5, 1., -7.5);
58010 assert_eq_m128(r, e);
58011 }
58012
58013 #[simd_test(enable = "avx512f")]
58014 unsafe fn test_mm_mask_cvt_roundsd_ss() {
58015 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58016 let b = _mm_set_pd(6., -7.5);
58017 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
58018 assert_eq_m128(r, a);
58019 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58020 a, 0b11111111, a, b,
58021 );
58022 let e = _mm_set_ps(0., -0.5, 1., -7.5);
58023 assert_eq_m128(r, e);
58024 }
58025
58026 #[simd_test(enable = "avx512f")]
58027 unsafe fn test_mm_maskz_cvt_roundsd_ss() {
58028 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58029 let b = _mm_set_pd(6., -7.5);
58030 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58031 let e = _mm_set_ps(0., -0.5, 1., 0.);
58032 assert_eq_m128(r, e);
58033 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58034 0b11111111, a, b,
58035 );
58036 let e = _mm_set_ps(0., -0.5, 1., -7.5);
58037 assert_eq_m128(r, e);
58038 }
58039
58040 #[simd_test(enable = "avx512f")]
58041 unsafe fn test_mm_cvt_roundss_si32() {
58042 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58043 let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58044 let e: i32 = -1;
58045 assert_eq!(r, e);
58046 }
58047
58048 #[simd_test(enable = "avx512f")]
58049 unsafe fn test_mm_cvt_roundss_i32() {
58050 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58051 let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58052 let e: i32 = -1;
58053 assert_eq!(r, e);
58054 }
58055
58056 #[simd_test(enable = "avx512f")]
58057 unsafe fn test_mm_cvt_roundss_u32() {
58058 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58059 let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58060 let e: u32 = u32::MAX;
58061 assert_eq!(r, e);
58062 }
58063
58064 #[simd_test(enable = "avx512f")]
58065 unsafe fn test_mm_cvtss_i32() {
58066 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58067 let r = _mm_cvtss_i32(a);
58068 let e: i32 = -2;
58069 assert_eq!(r, e);
58070 }
58071
58072 #[simd_test(enable = "avx512f")]
58073 unsafe fn test_mm_cvtss_u32() {
58074 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58075 let r = _mm_cvtss_u32(a);
58076 let e: u32 = u32::MAX;
58077 assert_eq!(r, e);
58078 }
58079
58080 #[simd_test(enable = "avx512f")]
58081 unsafe fn test_mm_cvt_roundsd_si32() {
58082 let a = _mm_set_pd(1., -1.5);
58083 let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58084 let e: i32 = -1;
58085 assert_eq!(r, e);
58086 }
58087
58088 #[simd_test(enable = "avx512f")]
58089 unsafe fn test_mm_cvt_roundsd_i32() {
58090 let a = _mm_set_pd(1., -1.5);
58091 let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58092 let e: i32 = -1;
58093 assert_eq!(r, e);
58094 }
58095
58096 #[simd_test(enable = "avx512f")]
58097 unsafe fn test_mm_cvt_roundsd_u32() {
58098 let a = _mm_set_pd(1., -1.5);
58099 let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58100 let e: u32 = u32::MAX;
58101 assert_eq!(r, e);
58102 }
58103
58104 #[simd_test(enable = "avx512f")]
58105 unsafe fn test_mm_cvtsd_i32() {
58106 let a = _mm_set_pd(1., -1.5);
58107 let r = _mm_cvtsd_i32(a);
58108 let e: i32 = -2;
58109 assert_eq!(r, e);
58110 }
58111
58112 #[simd_test(enable = "avx512f")]
58113 unsafe fn test_mm_cvtsd_u32() {
58114 let a = _mm_set_pd(1., -1.5);
58115 let r = _mm_cvtsd_u32(a);
58116 let e: u32 = u32::MAX;
58117 assert_eq!(r, e);
58118 }
58119
58120 #[simd_test(enable = "avx512f")]
58121 unsafe fn test_mm_cvt_roundi32_ss() {
58122 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58123 let b: i32 = 9;
58124 let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58125 let e = _mm_set_ps(0., -0.5, 1., 9.);
58126 assert_eq_m128(r, e);
58127 }
58128
58129 #[simd_test(enable = "avx512f")]
58130 unsafe fn test_mm_cvt_roundsi32_ss() {
58131 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58132 let b: i32 = 9;
58133 let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58134 let e = _mm_set_ps(0., -0.5, 1., 9.);
58135 assert_eq_m128(r, e);
58136 }
58137
58138 #[simd_test(enable = "avx512f")]
58139 unsafe fn test_mm_cvt_roundu32_ss() {
58140 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58141 let b: u32 = 9;
58142 let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58143 let e = _mm_set_ps(0., -0.5, 1., 9.);
58144 assert_eq_m128(r, e);
58145 }
58146
58147 #[simd_test(enable = "avx512f")]
58148 unsafe fn test_mm_cvti32_ss() {
58149 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58150 let b: i32 = 9;
58151 let r = _mm_cvti32_ss(a, b);
58152 let e = _mm_set_ps(0., -0.5, 1., 9.);
58153 assert_eq_m128(r, e);
58154 }
58155
58156 #[simd_test(enable = "avx512f")]
58157 unsafe fn test_mm_cvti32_sd() {
58158 let a = _mm_set_pd(1., -1.5);
58159 let b: i32 = 9;
58160 let r = _mm_cvti32_sd(a, b);
58161 let e = _mm_set_pd(1., 9.);
58162 assert_eq_m128d(r, e);
58163 }
58164
58165 #[simd_test(enable = "avx512f")]
58166 unsafe fn test_mm_cvtt_roundss_si32() {
58167 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58168 let r = _mm_cvtt_roundss_si32::<_MM_FROUND_CUR_DIRECTION>(a);
58169 let e: i32 = -2;
58170 assert_eq!(r, e);
58171 }
58172
58173 #[simd_test(enable = "avx512f")]
58174 unsafe fn test_mm_cvtt_roundss_i32() {
58175 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58176 let r = _mm_cvtt_roundss_i32::<_MM_FROUND_CUR_DIRECTION>(a);
58177 let e: i32 = -2;
58178 assert_eq!(r, e);
58179 }
58180
58181 #[simd_test(enable = "avx512f")]
58182 unsafe fn test_mm_cvtt_roundss_u32() {
58183 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58184 let r = _mm_cvtt_roundss_u32::<_MM_FROUND_CUR_DIRECTION>(a);
58185 let e: u32 = u32::MAX;
58186 assert_eq!(r, e);
58187 }
58188
58189 #[simd_test(enable = "avx512f")]
58190 unsafe fn test_mm_cvttss_i32() {
58191 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58192 let r = _mm_cvttss_i32(a);
58193 let e: i32 = -2;
58194 assert_eq!(r, e);
58195 }
58196
58197 #[simd_test(enable = "avx512f")]
58198 unsafe fn test_mm_cvttss_u32() {
58199 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58200 let r = _mm_cvttss_u32(a);
58201 let e: u32 = u32::MAX;
58202 assert_eq!(r, e);
58203 }
58204
58205 #[simd_test(enable = "avx512f")]
58206 unsafe fn test_mm_cvtt_roundsd_si32() {
58207 let a = _mm_set_pd(1., -1.5);
58208 let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_CUR_DIRECTION>(a);
58209 let e: i32 = -2;
58210 assert_eq!(r, e);
58211 }
58212
58213 #[simd_test(enable = "avx512f")]
58214 unsafe fn test_mm_cvtt_roundsd_i32() {
58215 let a = _mm_set_pd(1., -1.5);
58216 let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_CUR_DIRECTION>(a);
58217 let e: i32 = -2;
58218 assert_eq!(r, e);
58219 }
58220
58221 #[simd_test(enable = "avx512f")]
58222 unsafe fn test_mm_cvtt_roundsd_u32() {
58223 let a = _mm_set_pd(1., -1.5);
58224 let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_CUR_DIRECTION>(a);
58225 let e: u32 = u32::MAX;
58226 assert_eq!(r, e);
58227 }
58228
58229 #[simd_test(enable = "avx512f")]
58230 unsafe fn test_mm_cvttsd_i32() {
58231 let a = _mm_set_pd(1., -1.5);
58232 let r = _mm_cvttsd_i32(a);
58233 let e: i32 = -2;
58234 assert_eq!(r, e);
58235 }
58236
58237 #[simd_test(enable = "avx512f")]
58238 unsafe fn test_mm_cvttsd_u32() {
58239 let a = _mm_set_pd(1., -1.5);
58240 let r = _mm_cvttsd_u32(a);
58241 let e: u32 = u32::MAX;
58242 assert_eq!(r, e);
58243 }
58244
58245 #[simd_test(enable = "avx512f")]
58246 unsafe fn test_mm_cvtu32_ss() {
58247 let a = _mm_set_ps(0., -0.5, 1., -1.5);
58248 let b: u32 = 9;
58249 let r = _mm_cvtu32_ss(a, b);
58250 let e = _mm_set_ps(0., -0.5, 1., 9.);
58251 assert_eq_m128(r, e);
58252 }
58253
58254 #[simd_test(enable = "avx512f")]
58255 unsafe fn test_mm_cvtu32_sd() {
58256 let a = _mm_set_pd(1., -1.5);
58257 let b: u32 = 9;
58258 let r = _mm_cvtu32_sd(a, b);
58259 let e = _mm_set_pd(1., 9.);
58260 assert_eq_m128d(r, e);
58261 }
58262
58263 #[simd_test(enable = "avx512f")]
58264 unsafe fn test_mm_comi_round_ss() {
58265 let a = _mm_set1_ps(2.2);
58266 let b = _mm_set1_ps(1.1);
58267 let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
58268 let e: i32 = 0;
58269 assert_eq!(r, e);
58270 }
58271
58272 #[simd_test(enable = "avx512f")]
58273 unsafe fn test_mm_comi_round_sd() {
58274 let a = _mm_set1_pd(2.2);
58275 let b = _mm_set1_pd(1.1);
58276 let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
58277 let e: i32 = 0;
58278 assert_eq!(r, e);
58279 }
58280
58281 #[simd_test(enable = "avx512f")]
58282 unsafe fn test_mm512_cvtsi512_si32() {
58283 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
58284 let r = _mm512_cvtsi512_si32(a);
58285 let e: i32 = 1;
58286 assert_eq!(r, e);
58287 }
58288
58289 #[simd_test(enable = "avx512f")]
58290 unsafe fn test_mm512_shuffle_pd() {
58291 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
58292 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
58293 let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
58294 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
58295 assert_eq_m512d(r, e);
58296 }
58297
58298 #[simd_test(enable = "avx512f")]
58299 unsafe fn test_mm512_mask_shuffle_pd() {
58300 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
58301 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
58302 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
58303 assert_eq_m512d(r, a);
58304 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
58305 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
58306 assert_eq_m512d(r, e);
58307 }
58308
58309 #[simd_test(enable = "avx512f")]
58310 unsafe fn test_mm512_maskz_shuffle_pd() {
58311 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
58312 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
58313 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
58314 assert_eq_m512d(r, _mm512_setzero_pd());
58315 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
58316 let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
58317 assert_eq_m512d(r, e);
58318 }
58319
58320 #[simd_test(enable = "avx512f")]
58321 unsafe fn test_mm512_mask_expandloadu_epi32() {
58322 let src = _mm512_set1_epi32(42);
58323 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
58324 let p = a.as_ptr();
58325 let m = 0b11101000_11001010;
58326 let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
58327 let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
58328 assert_eq_m512i(r, e);
58329 }
58330
58331 #[simd_test(enable = "avx512f")]
58332 unsafe fn test_mm512_maskz_expandloadu_epi32() {
58333 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
58334 let p = a.as_ptr();
58335 let m = 0b11101000_11001010;
58336 let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
58337 let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
58338 assert_eq_m512i(r, e);
58339 }
58340
58341 #[simd_test(enable = "avx512f,avx512vl")]
58342 unsafe fn test_mm256_mask_expandloadu_epi32() {
58343 let src = _mm256_set1_epi32(42);
58344 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
58345 let p = a.as_ptr();
58346 let m = 0b11101000;
58347 let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
58348 let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
58349 assert_eq_m256i(r, e);
58350 }
58351
58352 #[simd_test(enable = "avx512f,avx512vl")]
58353 unsafe fn test_mm256_maskz_expandloadu_epi32() {
58354 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
58355 let p = a.as_ptr();
58356 let m = 0b11101000;
58357 let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
58358 let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
58359 assert_eq_m256i(r, e);
58360 }
58361
58362 #[simd_test(enable = "avx512f,avx512vl")]
58363 unsafe fn test_mm_mask_expandloadu_epi32() {
58364 let src = _mm_set1_epi32(42);
58365 let a = &[1_i32, 2, 3, 4];
58366 let p = a.as_ptr();
58367 let m = 0b11111000;
58368 let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
58369 let e = _mm_set_epi32(1, 42, 42, 42);
58370 assert_eq_m128i(r, e);
58371 }
58372
58373 #[simd_test(enable = "avx512f,avx512vl")]
58374 unsafe fn test_mm_maskz_expandloadu_epi32() {
58375 let a = &[1_i32, 2, 3, 4];
58376 let p = a.as_ptr();
58377 let m = 0b11111000;
58378 let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
58379 let e = _mm_set_epi32(1, 0, 0, 0);
58380 assert_eq_m128i(r, e);
58381 }
58382
58383 #[simd_test(enable = "avx512f")]
58384 unsafe fn test_mm512_mask_expandloadu_epi64() {
58385 let src = _mm512_set1_epi64(42);
58386 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
58387 let p = a.as_ptr();
58388 let m = 0b11101000;
58389 let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
58390 let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
58391 assert_eq_m512i(r, e);
58392 }
58393
58394 #[simd_test(enable = "avx512f")]
58395 unsafe fn test_mm512_maskz_expandloadu_epi64() {
58396 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
58397 let p = a.as_ptr();
58398 let m = 0b11101000;
58399 let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
58400 let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
58401 assert_eq_m512i(r, e);
58402 }
58403
58404 #[simd_test(enable = "avx512f,avx512vl")]
58405 unsafe fn test_mm256_mask_expandloadu_epi64() {
58406 let src = _mm256_set1_epi64x(42);
58407 let a = &[1_i64, 2, 3, 4];
58408 let p = a.as_ptr();
58409 let m = 0b11101000;
58410 let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
58411 let e = _mm256_set_epi64x(1, 42, 42, 42);
58412 assert_eq_m256i(r, e);
58413 }
58414
58415 #[simd_test(enable = "avx512f,avx512vl")]
58416 unsafe fn test_mm256_maskz_expandloadu_epi64() {
58417 let a = &[1_i64, 2, 3, 4];
58418 let p = a.as_ptr();
58419 let m = 0b11101000;
58420 let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
58421 let e = _mm256_set_epi64x(1, 0, 0, 0);
58422 assert_eq_m256i(r, e);
58423 }
58424
58425 #[simd_test(enable = "avx512f,avx512vl")]
58426 unsafe fn test_mm_mask_expandloadu_epi64() {
58427 let src = _mm_set1_epi64x(42);
58428 let a = &[1_i64, 2];
58429 let p = a.as_ptr();
58430 let m = 0b11101000;
58431 let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
58432 let e = _mm_set_epi64x(42, 42);
58433 assert_eq_m128i(r, e);
58434 }
58435
58436 #[simd_test(enable = "avx512f,avx512vl")]
58437 unsafe fn test_mm_maskz_expandloadu_epi64() {
58438 let a = &[1_i64, 2];
58439 let p = a.as_ptr();
58440 let m = 0b11101000;
58441 let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
58442 let e = _mm_set_epi64x(0, 0);
58443 assert_eq_m128i(r, e);
58444 }
58445
58446 #[simd_test(enable = "avx512f")]
58447 unsafe fn test_mm512_mask_expandloadu_ps() {
58448 let src = _mm512_set1_ps(42.);
58449 let a = &[
58450 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
58451 ];
58452 let p = a.as_ptr();
58453 let m = 0b11101000_11001010;
58454 let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
58455 let e = _mm512_set_ps(
58456 8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
58457 );
58458 assert_eq_m512(r, e);
58459 }
58460
58461 #[simd_test(enable = "avx512f")]
58462 unsafe fn test_mm512_maskz_expandloadu_ps() {
58463 let a = &[
58464 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
58465 ];
58466 let p = a.as_ptr();
58467 let m = 0b11101000_11001010;
58468 let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
58469 let e = _mm512_set_ps(
58470 8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
58471 );
58472 assert_eq_m512(r, e);
58473 }
58474
58475 #[simd_test(enable = "avx512f,avx512vl")]
58476 unsafe fn test_mm256_mask_expandloadu_ps() {
58477 let src = _mm256_set1_ps(42.);
58478 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
58479 let p = a.as_ptr();
58480 let m = 0b11101000;
58481 let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
58482 let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
58483 assert_eq_m256(r, e);
58484 }
58485
58486 #[simd_test(enable = "avx512f,avx512vl")]
58487 unsafe fn test_mm256_maskz_expandloadu_ps() {
58488 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
58489 let p = a.as_ptr();
58490 let m = 0b11101000;
58491 let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
58492 let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
58493 assert_eq_m256(r, e);
58494 }
58495
58496 #[simd_test(enable = "avx512f,avx512vl")]
58497 unsafe fn test_mm_mask_expandloadu_ps() {
58498 let src = _mm_set1_ps(42.);
58499 let a = &[1.0f32, 2., 3., 4.];
58500 let p = a.as_ptr();
58501 let m = 0b11101000;
58502 let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
58503 let e = _mm_set_ps(1., 42., 42., 42.);
58504 assert_eq_m128(r, e);
58505 }
58506
58507 #[simd_test(enable = "avx512f,avx512vl")]
58508 unsafe fn test_mm_maskz_expandloadu_ps() {
58509 let a = &[1.0f32, 2., 3., 4.];
58510 let p = a.as_ptr();
58511 let m = 0b11101000;
58512 let r = _mm_maskz_expandloadu_ps(m, black_box(p));
58513 let e = _mm_set_ps(1., 0., 0., 0.);
58514 assert_eq_m128(r, e);
58515 }
58516
58517 #[simd_test(enable = "avx512f")]
58518 unsafe fn test_mm512_mask_expandloadu_pd() {
58519 let src = _mm512_set1_pd(42.);
58520 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
58521 let p = a.as_ptr();
58522 let m = 0b11101000;
58523 let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
58524 let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
58525 assert_eq_m512d(r, e);
58526 }
58527
58528 #[simd_test(enable = "avx512f")]
58529 unsafe fn test_mm512_maskz_expandloadu_pd() {
58530 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
58531 let p = a.as_ptr();
58532 let m = 0b11101000;
58533 let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
58534 let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
58535 assert_eq_m512d(r, e);
58536 }
58537
58538 #[simd_test(enable = "avx512f,avx512vl")]
58539 unsafe fn test_mm256_mask_expandloadu_pd() {
58540 let src = _mm256_set1_pd(42.);
58541 let a = &[1.0f64, 2., 3., 4.];
58542 let p = a.as_ptr();
58543 let m = 0b11101000;
58544 let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
58545 let e = _mm256_set_pd(1., 42., 42., 42.);
58546 assert_eq_m256d(r, e);
58547 }
58548
58549 #[simd_test(enable = "avx512f,avx512vl")]
58550 unsafe fn test_mm256_maskz_expandloadu_pd() {
58551 let a = &[1.0f64, 2., 3., 4.];
58552 let p = a.as_ptr();
58553 let m = 0b11101000;
58554 let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
58555 let e = _mm256_set_pd(1., 0., 0., 0.);
58556 assert_eq_m256d(r, e);
58557 }
58558
58559 #[simd_test(enable = "avx512f,avx512vl")]
58560 unsafe fn test_mm_mask_expandloadu_pd() {
58561 let src = _mm_set1_pd(42.);
58562 let a = &[1.0f64, 2.];
58563 let p = a.as_ptr();
58564 let m = 0b11101000;
58565 let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
58566 let e = _mm_set_pd(42., 42.);
58567 assert_eq_m128d(r, e);
58568 }
58569
58570 #[simd_test(enable = "avx512f,avx512vl")]
58571 unsafe fn test_mm_maskz_expandloadu_pd() {
58572 let a = &[1.0f64, 2.];
58573 let p = a.as_ptr();
58574 let m = 0b11101000;
58575 let r = _mm_maskz_expandloadu_pd(m, black_box(p));
58576 let e = _mm_set_pd(0., 0.);
58577 assert_eq_m128d(r, e);
58578 }
58579}
58580