avx512f.rs source code [crates/core_arch/src/x86/avx512f.rs]

1	use crate::{
2	arch::asm,
3	core_arch::{simd::, x86::},
4	intrinsics::simd::*,
5	intrinsics::{fmaf32, fmaf64},
6	mem, ptr,
7	};
8
9	use core::hint::unreachable_unchecked;
10	#[cfg(test)]
11	use stdarch_test::assert_instr;
12
13	/// Computes the absolute values of packed 32-bit integers in `a`.
14	///
15	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16	#[inline]
17	#[target_feature(enable = "avx512f")]
18	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19	#[cfg_attr(test, assert_instr(vpabsd))]
20	pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21	unsafe {
22	let a: i32x16 = a.as_i32x16();
23	let r: i32x16 = simd_select::<i32x16, _>(mask:simd_lt(a, i32x16::ZERO), if_true:simd_neg(a), if_false:a);
24	transmute(src:r)
25	}
26	}
27
28	/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29	/// unsigned results in `dst` using writemask `k` (elements are copied from
30	/// `src` when the corresponding mask bit is not set).
31	///
32	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33	#[inline]
34	#[target_feature(enable = "avx512f")]
35	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36	#[cfg_attr(test, assert_instr(vpabsd))]
37	pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38	unsafe {
39	let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
40	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
41	}
42	}
43
44	/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45	/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46	/// the corresponding mask bit is not set).
47	///
48	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49	#[inline]
50	#[target_feature(enable = "avx512f")]
51	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52	#[cfg_attr(test, assert_instr(vpabsd))]
53	pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54	unsafe {
55	let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
56	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x16::ZERO))
57	}
58	}
59
60	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61	///
62	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63	#[inline]
64	#[target_feature(enable = "avx512f,avx512vl")]
65	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66	#[cfg_attr(test, assert_instr(vpabsd))]
67	pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68	unsafe {
69	let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
70	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
71	}
72	}
73
74	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75	///
76	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77	#[inline]
78	#[target_feature(enable = "avx512f,avx512vl")]
79	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80	#[cfg_attr(test, assert_instr(vpabsd))]
81	pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82	unsafe {
83	let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
84	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x8::ZERO))
85	}
86	}
87
88	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89	///
90	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91	#[inline]
92	#[target_feature(enable = "avx512f,avx512vl")]
93	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94	#[cfg_attr(test, assert_instr(vpabsd))]
95	pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96	unsafe {
97	let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
98	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
99	}
100	}
101
102	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103	///
104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105	#[inline]
106	#[target_feature(enable = "avx512f,avx512vl")]
107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108	#[cfg_attr(test, assert_instr(vpabsd))]
109	pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110	unsafe {
111	let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
112	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x4::ZERO))
113	}
114	}
115
116	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117	///
118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119	#[inline]
120	#[target_feature(enable = "avx512f")]
121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122	#[cfg_attr(test, assert_instr(vpabsq))]
123	pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124	unsafe {
125	let a: i64x8 = a.as_i64x8();
126	let r: i64x8 = simd_select::<i64x8, _>(mask:simd_lt(a, i64x8::ZERO), if_true:simd_neg(a), if_false:a);
127	transmute(src:r)
128	}
129	}
130
131	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132	///
133	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134	#[inline]
135	#[target_feature(enable = "avx512f")]
136	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137	#[cfg_attr(test, assert_instr(vpabsq))]
138	pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139	unsafe {
140	let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
141	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
142	}
143	}
144
145	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146	///
147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148	#[inline]
149	#[target_feature(enable = "avx512f")]
150	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151	#[cfg_attr(test, assert_instr(vpabsq))]
152	pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153	unsafe {
154	let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
155	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x8::ZERO))
156	}
157	}
158
159	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160	///
161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162	#[inline]
163	#[target_feature(enable = "avx512f,avx512vl")]
164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165	#[cfg_attr(test, assert_instr(vpabsq))]
166	pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167	unsafe {
168	let a: i64x4 = a.as_i64x4();
169	let r: i64x4 = simd_select::<i64x4, _>(mask:simd_lt(a, i64x4::ZERO), if_true:simd_neg(a), if_false:a);
170	transmute(src:r)
171	}
172	}
173
174	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175	///
176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177	#[inline]
178	#[target_feature(enable = "avx512f,avx512vl")]
179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180	#[cfg_attr(test, assert_instr(vpabsq))]
181	pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182	unsafe {
183	let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
184	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
185	}
186	}
187
188	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189	///
190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191	#[inline]
192	#[target_feature(enable = "avx512f,avx512vl")]
193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194	#[cfg_attr(test, assert_instr(vpabsq))]
195	pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196	unsafe {
197	let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
198	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x4::ZERO))
199	}
200	}
201
202	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203	///
204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205	#[inline]
206	#[target_feature(enable = "avx512f,avx512vl")]
207	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
208	#[cfg_attr(test, assert_instr(vpabsq))]
209	pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210	unsafe {
211	let a: i64x2 = a.as_i64x2();
212	let r: i64x2 = simd_select::<i64x2, _>(mask:simd_lt(a, i64x2::ZERO), if_true:simd_neg(a), if_false:a);
213	transmute(src:r)
214	}
215	}
216
217	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218	///
219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220	#[inline]
221	#[target_feature(enable = "avx512f,avx512vl")]
222	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223	#[cfg_attr(test, assert_instr(vpabsq))]
224	pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225	unsafe {
226	let abs: i64x2 = _mm_abs_epi64(a).as_i64x2();
227	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x2()))
228	}
229	}
230
231	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232	///
233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234	#[inline]
235	#[target_feature(enable = "avx512f,avx512vl")]
236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237	#[cfg_attr(test, assert_instr(vpabsq))]
238	pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239	unsafe {
240	let abs: i64x2 = _mm_abs_epi64(a).as_i64x2();
241	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x2::ZERO))
242	}
243	}
244
245	/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246	///
247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248	#[inline]
249	#[target_feature(enable = "avx512f")]
250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251	#[cfg_attr(test, assert_instr(vpandd))]
252	pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253	unsafe { simd_fabs(v2) }
254	}
255
256	/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257	///
258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259	#[inline]
260	#[target_feature(enable = "avx512f")]
261	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
262	#[cfg_attr(test, assert_instr(vpandd))]
263	pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264	unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
265	}
266
267	/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268	///
269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270	#[inline]
271	#[target_feature(enable = "avx512f")]
272	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273	#[cfg_attr(test, assert_instr(vpandq))]
274	pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275	unsafe { simd_fabs(v2) }
276	}
277
278	/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279	///
280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281	#[inline]
282	#[target_feature(enable = "avx512f")]
283	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284	#[cfg_attr(test, assert_instr(vpandq))]
285	pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286	unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
287	}
288
289	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290	///
291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292	#[inline]
293	#[target_feature(enable = "avx512f")]
294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295	#[cfg_attr(test, assert_instr(vmovdqa32))]
296	pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297	unsafe {
298	let mov: i32x16 = a.as_i32x16();
299	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
300	}
301	}
302
303	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304	///
305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306	#[inline]
307	#[target_feature(enable = "avx512f")]
308	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309	#[cfg_attr(test, assert_instr(vmovdqa32))]
310	pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311	unsafe {
312	let mov: i32x16 = a.as_i32x16();
313	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x16::ZERO))
314	}
315	}
316
317	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318	///
319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320	#[inline]
321	#[target_feature(enable = "avx512f,avx512vl")]
322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323	#[cfg_attr(test, assert_instr(vmovdqa32))]
324	pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325	unsafe {
326	let mov: i32x8 = a.as_i32x8();
327	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
328	}
329	}
330
331	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332	///
333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334	#[inline]
335	#[target_feature(enable = "avx512f,avx512vl")]
336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337	#[cfg_attr(test, assert_instr(vmovdqa32))]
338	pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339	unsafe {
340	let mov: i32x8 = a.as_i32x8();
341	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x8::ZERO))
342	}
343	}
344
345	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346	///
347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348	#[inline]
349	#[target_feature(enable = "avx512f,avx512vl")]
350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351	#[cfg_attr(test, assert_instr(vmovdqa32))]
352	pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353	unsafe {
354	let mov: i32x4 = a.as_i32x4();
355	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
356	}
357	}
358
359	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360	///
361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362	#[inline]
363	#[target_feature(enable = "avx512f,avx512vl")]
364	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365	#[cfg_attr(test, assert_instr(vmovdqa32))]
366	pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367	unsafe {
368	let mov: i32x4 = a.as_i32x4();
369	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x4::ZERO))
370	}
371	}
372
373	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374	///
375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376	#[inline]
377	#[target_feature(enable = "avx512f")]
378	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
379	#[cfg_attr(test, assert_instr(vmovdqa64))]
380	pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381	unsafe {
382	let mov: i64x8 = a.as_i64x8();
383	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
384	}
385	}
386
387	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388	///
389	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390	#[inline]
391	#[target_feature(enable = "avx512f")]
392	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
393	#[cfg_attr(test, assert_instr(vmovdqa64))]
394	pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395	unsafe {
396	let mov: i64x8 = a.as_i64x8();
397	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x8::ZERO))
398	}
399	}
400
401	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402	///
403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404	#[inline]
405	#[target_feature(enable = "avx512f,avx512vl")]
406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407	#[cfg_attr(test, assert_instr(vmovdqa64))]
408	pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409	unsafe {
410	let mov: i64x4 = a.as_i64x4();
411	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
412	}
413	}
414
415	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416	///
417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418	#[inline]
419	#[target_feature(enable = "avx512f,avx512vl")]
420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
421	#[cfg_attr(test, assert_instr(vmovdqa64))]
422	pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423	unsafe {
424	let mov: i64x4 = a.as_i64x4();
425	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x4::ZERO))
426	}
427	}
428
429	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430	///
431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432	#[inline]
433	#[target_feature(enable = "avx512f,avx512vl")]
434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435	#[cfg_attr(test, assert_instr(vmovdqa64))]
436	pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437	unsafe {
438	let mov: i64x2 = a.as_i64x2();
439	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
440	}
441	}
442
443	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444	///
445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446	#[inline]
447	#[target_feature(enable = "avx512f,avx512vl")]
448	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
449	#[cfg_attr(test, assert_instr(vmovdqa64))]
450	pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451	unsafe {
452	let mov: i64x2 = a.as_i64x2();
453	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x2::ZERO))
454	}
455	}
456
457	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458	///
459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460	#[inline]
461	#[target_feature(enable = "avx512f")]
462	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463	#[cfg_attr(test, assert_instr(vmovaps))]
464	pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465	unsafe {
466	let mov: f32x16 = a.as_f32x16();
467	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
468	}
469	}
470
471	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472	///
473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474	#[inline]
475	#[target_feature(enable = "avx512f")]
476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477	#[cfg_attr(test, assert_instr(vmovaps))]
478	pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479	unsafe {
480	let mov: f32x16 = a.as_f32x16();
481	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
482	}
483	}
484
485	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486	///
487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488	#[inline]
489	#[target_feature(enable = "avx512f,avx512vl")]
490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491	#[cfg_attr(test, assert_instr(vmovaps))]
492	pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493	unsafe {
494	let mov: f32x8 = a.as_f32x8();
495	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
496	}
497	}
498
499	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500	///
501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502	#[inline]
503	#[target_feature(enable = "avx512f,avx512vl")]
504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505	#[cfg_attr(test, assert_instr(vmovaps))]
506	pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507	unsafe {
508	let mov: f32x8 = a.as_f32x8();
509	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x8::ZERO))
510	}
511	}
512
513	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514	///
515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516	#[inline]
517	#[target_feature(enable = "avx512f,avx512vl")]
518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519	#[cfg_attr(test, assert_instr(vmovaps))]
520	pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521	unsafe {
522	let mov: f32x4 = a.as_f32x4();
523	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
524	}
525	}
526
527	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528	///
529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530	#[inline]
531	#[target_feature(enable = "avx512f,avx512vl")]
532	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533	#[cfg_attr(test, assert_instr(vmovaps))]
534	pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535	unsafe {
536	let mov: f32x4 = a.as_f32x4();
537	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x4::ZERO))
538	}
539	}
540
541	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542	///
543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544	#[inline]
545	#[target_feature(enable = "avx512f")]
546	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547	#[cfg_attr(test, assert_instr(vmovapd))]
548	pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549	unsafe {
550	let mov: f64x8 = a.as_f64x8();
551	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
552	}
553	}
554
555	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556	///
557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558	#[inline]
559	#[target_feature(enable = "avx512f")]
560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
561	#[cfg_attr(test, assert_instr(vmovapd))]
562	pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563	unsafe {
564	let mov: f64x8 = a.as_f64x8();
565	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
566	}
567	}
568
569	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570	///
571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572	#[inline]
573	#[target_feature(enable = "avx512f,avx512vl")]
574	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
575	#[cfg_attr(test, assert_instr(vmovapd))]
576	pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577	unsafe {
578	let mov: f64x4 = a.as_f64x4();
579	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
580	}
581	}
582
583	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584	///
585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586	#[inline]
587	#[target_feature(enable = "avx512f,avx512vl")]
588	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
589	#[cfg_attr(test, assert_instr(vmovapd))]
590	pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591	unsafe {
592	let mov: f64x4 = a.as_f64x4();
593	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x4::ZERO))
594	}
595	}
596
597	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598	///
599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600	#[inline]
601	#[target_feature(enable = "avx512f,avx512vl")]
602	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
603	#[cfg_attr(test, assert_instr(vmovapd))]
604	pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605	unsafe {
606	let mov: f64x2 = a.as_f64x2();
607	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
608	}
609	}
610
611	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612	///
613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614	#[inline]
615	#[target_feature(enable = "avx512f,avx512vl")]
616	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617	#[cfg_attr(test, assert_instr(vmovapd))]
618	pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619	unsafe {
620	let mov: f64x2 = a.as_f64x2();
621	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x2::ZERO))
622	}
623	}
624
625	/// Add packed 32-bit integers in a and b, and store the results in dst.
626	///
627	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628	#[inline]
629	#[target_feature(enable = "avx512f")]
630	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631	#[cfg_attr(test, assert_instr(vpaddd))]
632	pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633	unsafe { transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16())) }
634	}
635
636	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637	///
638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639	#[inline]
640	#[target_feature(enable = "avx512f")]
641	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
642	#[cfg_attr(test, assert_instr(vpaddd))]
643	pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644	unsafe {
645	let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
646	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
647	}
648	}
649
650	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651	///
652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653	#[inline]
654	#[target_feature(enable = "avx512f")]
655	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
656	#[cfg_attr(test, assert_instr(vpaddd))]
657	pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658	unsafe {
659	let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
660	transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x16::ZERO))
661	}
662	}
663
664	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665	///
666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667	#[inline]
668	#[target_feature(enable = "avx512f,avx512vl")]
669	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670	#[cfg_attr(test, assert_instr(vpaddd))]
671	pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672	unsafe {
673	let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
674	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
675	}
676	}
677
678	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679	///
680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681	#[inline]
682	#[target_feature(enable = "avx512f,avx512vl")]
683	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
684	#[cfg_attr(test, assert_instr(vpaddd))]
685	pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686	unsafe {
687	let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
688	transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x8::ZERO))
689	}
690	}
691
692	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693	///
694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695	#[inline]
696	#[target_feature(enable = "avx512f,avx512vl")]
697	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
698	#[cfg_attr(test, assert_instr(vpaddd))]
699	pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700	unsafe {
701	let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
702	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
703	}
704	}
705
706	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707	///
708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709	#[inline]
710	#[target_feature(enable = "avx512f,avx512vl")]
711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
712	#[cfg_attr(test, assert_instr(vpaddd))]
713	pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714	unsafe {
715	let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
716	transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x4::ZERO))
717	}
718	}
719
720	/// Add packed 64-bit integers in a and b, and store the results in dst.
721	///
722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723	#[inline]
724	#[target_feature(enable = "avx512f")]
725	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
726	#[cfg_attr(test, assert_instr(vpaddq))]
727	pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728	unsafe { transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8())) }
729	}
730
731	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732	///
733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734	#[inline]
735	#[target_feature(enable = "avx512f")]
736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737	#[cfg_attr(test, assert_instr(vpaddq))]
738	pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739	unsafe {
740	let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
741	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
742	}
743	}
744
745	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746	///
747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748	#[inline]
749	#[target_feature(enable = "avx512f")]
750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
751	#[cfg_attr(test, assert_instr(vpaddq))]
752	pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753	unsafe {
754	let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
755	transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x8::ZERO))
756	}
757	}
758
759	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760	///
761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762	#[inline]
763	#[target_feature(enable = "avx512f,avx512vl")]
764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765	#[cfg_attr(test, assert_instr(vpaddq))]
766	pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767	unsafe {
768	let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
769	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
770	}
771	}
772
773	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774	///
775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776	#[inline]
777	#[target_feature(enable = "avx512f,avx512vl")]
778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779	#[cfg_attr(test, assert_instr(vpaddq))]
780	pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781	unsafe {
782	let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
783	transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x4::ZERO))
784	}
785	}
786
787	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788	///
789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790	#[inline]
791	#[target_feature(enable = "avx512f,avx512vl")]
792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793	#[cfg_attr(test, assert_instr(vpaddq))]
794	pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795	unsafe {
796	let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
797	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
798	}
799	}
800
801	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802	///
803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804	#[inline]
805	#[target_feature(enable = "avx512f,avx512vl")]
806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807	#[cfg_attr(test, assert_instr(vpaddq))]
808	pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809	unsafe {
810	let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
811	transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x2::ZERO))
812	}
813	}
814
815	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816	///
817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818	#[inline]
819	#[target_feature(enable = "avx512f")]
820	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821	#[cfg_attr(test, assert_instr(vaddps))]
822	pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823	unsafe { transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16())) }
824	}
825
826	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827	///
828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829	#[inline]
830	#[target_feature(enable = "avx512f")]
831	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832	#[cfg_attr(test, assert_instr(vaddps))]
833	pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834	unsafe {
835	let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
836	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
837	}
838	}
839
840	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841	///
842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843	#[inline]
844	#[target_feature(enable = "avx512f")]
845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846	#[cfg_attr(test, assert_instr(vaddps))]
847	pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848	unsafe {
849	let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
850	transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x16::ZERO))
851	}
852	}
853
854	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855	///
856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857	#[inline]
858	#[target_feature(enable = "avx512f,avx512vl")]
859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860	#[cfg_attr(test, assert_instr(vaddps))]
861	pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862	unsafe {
863	let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
864	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
865	}
866	}
867
868	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869	///
870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871	#[inline]
872	#[target_feature(enable = "avx512f,avx512vl")]
873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
874	#[cfg_attr(test, assert_instr(vaddps))]
875	pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876	unsafe {
877	let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
878	transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x8::ZERO))
879	}
880	}
881
882	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883	///
884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885	#[inline]
886	#[target_feature(enable = "avx512f,avx512vl")]
887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
888	#[cfg_attr(test, assert_instr(vaddps))]
889	pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890	unsafe {
891	let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
892	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
893	}
894	}
895
896	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897	///
898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899	#[inline]
900	#[target_feature(enable = "avx512f,avx512vl")]
901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902	#[cfg_attr(test, assert_instr(vaddps))]
903	pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904	unsafe {
905	let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
906	transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x4::ZERO))
907	}
908	}
909
910	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911	///
912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913	#[inline]
914	#[target_feature(enable = "avx512f")]
915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916	#[cfg_attr(test, assert_instr(vaddpd))]
917	pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918	unsafe { transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8())) }
919	}
920
921	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922	///
923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924	#[inline]
925	#[target_feature(enable = "avx512f")]
926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
927	#[cfg_attr(test, assert_instr(vaddpd))]
928	pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929	unsafe {
930	let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
931	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
932	}
933	}
934
935	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936	///
937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938	#[inline]
939	#[target_feature(enable = "avx512f")]
940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941	#[cfg_attr(test, assert_instr(vaddpd))]
942	pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943	unsafe {
944	let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
945	transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x8::ZERO))
946	}
947	}
948
949	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950	///
951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952	#[inline]
953	#[target_feature(enable = "avx512f,avx512vl")]
954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955	#[cfg_attr(test, assert_instr(vaddpd))]
956	pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957	unsafe {
958	let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
959	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
960	}
961	}
962
963	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964	///
965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966	#[inline]
967	#[target_feature(enable = "avx512f,avx512vl")]
968	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969	#[cfg_attr(test, assert_instr(vaddpd))]
970	pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971	unsafe {
972	let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
973	transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x4::ZERO))
974	}
975	}
976
977	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978	///
979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980	#[inline]
981	#[target_feature(enable = "avx512f,avx512vl")]
982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
983	#[cfg_attr(test, assert_instr(vaddpd))]
984	pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985	unsafe {
986	let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
987	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
988	}
989	}
990
991	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992	///
993	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994	#[inline]
995	#[target_feature(enable = "avx512f,avx512vl")]
996	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
997	#[cfg_attr(test, assert_instr(vaddpd))]
998	pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999	unsafe {
1000	let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
1001	transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x2::ZERO))
1002	}
1003	}
1004
1005	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006	///
1007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008	#[inline]
1009	#[target_feature(enable = "avx512f")]
1010	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011	#[cfg_attr(test, assert_instr(vpsubd))]
1012	pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013	unsafe { transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16())) }
1014	}
1015
1016	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017	///
1018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019	#[inline]
1020	#[target_feature(enable = "avx512f")]
1021	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022	#[cfg_attr(test, assert_instr(vpsubd))]
1023	pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024	unsafe {
1025	let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
1026	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
1027	}
1028	}
1029
1030	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031	///
1032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033	#[inline]
1034	#[target_feature(enable = "avx512f")]
1035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036	#[cfg_attr(test, assert_instr(vpsubd))]
1037	pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038	unsafe {
1039	let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
1040	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x16::ZERO))
1041	}
1042	}
1043
1044	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045	///
1046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047	#[inline]
1048	#[target_feature(enable = "avx512f,avx512vl")]
1049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1050	#[cfg_attr(test, assert_instr(vpsubd))]
1051	pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052	unsafe {
1053	let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
1054	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
1055	}
1056	}
1057
1058	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059	///
1060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061	#[inline]
1062	#[target_feature(enable = "avx512f,avx512vl")]
1063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1064	#[cfg_attr(test, assert_instr(vpsubd))]
1065	pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066	unsafe {
1067	let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
1068	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x8::ZERO))
1069	}
1070	}
1071
1072	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073	///
1074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075	#[inline]
1076	#[target_feature(enable = "avx512f,avx512vl")]
1077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1078	#[cfg_attr(test, assert_instr(vpsubd))]
1079	pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080	unsafe {
1081	let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
1082	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
1083	}
1084	}
1085
1086	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087	///
1088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089	#[inline]
1090	#[target_feature(enable = "avx512f,avx512vl")]
1091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092	#[cfg_attr(test, assert_instr(vpsubd))]
1093	pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094	unsafe {
1095	let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
1096	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x4::ZERO))
1097	}
1098	}
1099
1100	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101	///
1102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103	#[inline]
1104	#[target_feature(enable = "avx512f")]
1105	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1106	#[cfg_attr(test, assert_instr(vpsubq))]
1107	pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108	unsafe { transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8())) }
1109	}
1110
1111	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112	///
1113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114	#[inline]
1115	#[target_feature(enable = "avx512f")]
1116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117	#[cfg_attr(test, assert_instr(vpsubq))]
1118	pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119	unsafe {
1120	let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1121	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1122	}
1123	}
1124
1125	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126	///
1127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128	#[inline]
1129	#[target_feature(enable = "avx512f")]
1130	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1131	#[cfg_attr(test, assert_instr(vpsubq))]
1132	pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133	unsafe {
1134	let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1135	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x8::ZERO))
1136	}
1137	}
1138
1139	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140	///
1141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142	#[inline]
1143	#[target_feature(enable = "avx512f,avx512vl")]
1144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145	#[cfg_attr(test, assert_instr(vpsubq))]
1146	pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147	unsafe {
1148	let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1149	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1150	}
1151	}
1152
1153	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154	///
1155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156	#[inline]
1157	#[target_feature(enable = "avx512f,avx512vl")]
1158	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1159	#[cfg_attr(test, assert_instr(vpsubq))]
1160	pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161	unsafe {
1162	let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1163	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x4::ZERO))
1164	}
1165	}
1166
1167	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168	///
1169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170	#[inline]
1171	#[target_feature(enable = "avx512f,avx512vl")]
1172	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1173	#[cfg_attr(test, assert_instr(vpsubq))]
1174	pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175	unsafe {
1176	let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1177	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1178	}
1179	}
1180
1181	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182	///
1183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184	#[inline]
1185	#[target_feature(enable = "avx512f,avx512vl")]
1186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1187	#[cfg_attr(test, assert_instr(vpsubq))]
1188	pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189	unsafe {
1190	let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1191	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x2::ZERO))
1192	}
1193	}
1194
1195	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196	///
1197	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198	#[inline]
1199	#[target_feature(enable = "avx512f")]
1200	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1201	#[cfg_attr(test, assert_instr(vsubps))]
1202	pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203	unsafe { transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1204	}
1205
1206	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207	///
1208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209	#[inline]
1210	#[target_feature(enable = "avx512f")]
1211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212	#[cfg_attr(test, assert_instr(vsubps))]
1213	pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214	unsafe {
1215	let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1216	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1217	}
1218	}
1219
1220	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221	///
1222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223	#[inline]
1224	#[target_feature(enable = "avx512f")]
1225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1226	#[cfg_attr(test, assert_instr(vsubps))]
1227	pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228	unsafe {
1229	let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1230	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x16::ZERO))
1231	}
1232	}
1233
1234	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235	///
1236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237	#[inline]
1238	#[target_feature(enable = "avx512f,avx512vl")]
1239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240	#[cfg_attr(test, assert_instr(vsubps))]
1241	pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242	unsafe {
1243	let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1244	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1245	}
1246	}
1247
1248	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249	///
1250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251	#[inline]
1252	#[target_feature(enable = "avx512f,avx512vl")]
1253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1254	#[cfg_attr(test, assert_instr(vsubps))]
1255	pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256	unsafe {
1257	let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1258	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x8::ZERO))
1259	}
1260	}
1261
1262	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263	///
1264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265	#[inline]
1266	#[target_feature(enable = "avx512f,avx512vl")]
1267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1268	#[cfg_attr(test, assert_instr(vsubps))]
1269	pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270	unsafe {
1271	let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1272	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1273	}
1274	}
1275
1276	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277	///
1278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279	#[inline]
1280	#[target_feature(enable = "avx512f,avx512vl")]
1281	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282	#[cfg_attr(test, assert_instr(vsubps))]
1283	pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284	unsafe {
1285	let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1286	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x4::ZERO))
1287	}
1288	}
1289
1290	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291	///
1292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293	#[inline]
1294	#[target_feature(enable = "avx512f")]
1295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296	#[cfg_attr(test, assert_instr(vsubpd))]
1297	pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298	unsafe { transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
1299	}
1300
1301	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302	///
1303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304	#[inline]
1305	#[target_feature(enable = "avx512f")]
1306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1307	#[cfg_attr(test, assert_instr(vsubpd))]
1308	pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309	unsafe {
1310	let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1311	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1312	}
1313	}
1314
1315	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316	///
1317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318	#[inline]
1319	#[target_feature(enable = "avx512f")]
1320	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1321	#[cfg_attr(test, assert_instr(vsubpd))]
1322	pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323	unsafe {
1324	let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1325	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x8::ZERO))
1326	}
1327	}
1328
1329	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330	///
1331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332	#[inline]
1333	#[target_feature(enable = "avx512f,avx512vl")]
1334	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1335	#[cfg_attr(test, assert_instr(vsubpd))]
1336	pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337	unsafe {
1338	let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1339	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1340	}
1341	}
1342
1343	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344	///
1345	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346	#[inline]
1347	#[target_feature(enable = "avx512f,avx512vl")]
1348	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1349	#[cfg_attr(test, assert_instr(vsubpd))]
1350	pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351	unsafe {
1352	let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1353	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x4::ZERO))
1354	}
1355	}
1356
1357	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358	///
1359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360	#[inline]
1361	#[target_feature(enable = "avx512f,avx512vl")]
1362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363	#[cfg_attr(test, assert_instr(vsubpd))]
1364	pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365	unsafe {
1366	let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1367	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1368	}
1369	}
1370
1371	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372	///
1373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374	#[inline]
1375	#[target_feature(enable = "avx512f,avx512vl")]
1376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1377	#[cfg_attr(test, assert_instr(vsubpd))]
1378	pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379	unsafe {
1380	let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1381	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x2::ZERO))
1382	}
1383	}
1384
1385	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386	///
1387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388	#[inline]
1389	#[target_feature(enable = "avx512f")]
1390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1391	#[cfg_attr(test, assert_instr(vpmuldq))]
1392	pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393	unsafe {
1394	let a: i64x8 = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395	let b: i64x8 = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396	transmute(src:simd_mul(x:a, y:b))
1397	}
1398	}
1399
1400	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401	///
1402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403	#[inline]
1404	#[target_feature(enable = "avx512f")]
1405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1406	#[cfg_attr(test, assert_instr(vpmuldq))]
1407	pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408	unsafe {
1409	let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1410	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1411	}
1412	}
1413
1414	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415	///
1416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417	#[inline]
1418	#[target_feature(enable = "avx512f")]
1419	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420	#[cfg_attr(test, assert_instr(vpmuldq))]
1421	pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422	unsafe {
1423	let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1424	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x8::ZERO))
1425	}
1426	}
1427
1428	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429	///
1430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431	#[inline]
1432	#[target_feature(enable = "avx512f,avx512vl")]
1433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434	#[cfg_attr(test, assert_instr(vpmuldq))]
1435	pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436	unsafe {
1437	let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1438	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1439	}
1440	}
1441
1442	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443	///
1444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445	#[inline]
1446	#[target_feature(enable = "avx512f,avx512vl")]
1447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1448	#[cfg_attr(test, assert_instr(vpmuldq))]
1449	pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450	unsafe {
1451	let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1452	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x4::ZERO))
1453	}
1454	}
1455
1456	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457	///
1458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459	#[inline]
1460	#[target_feature(enable = "avx512f,avx512vl")]
1461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462	#[cfg_attr(test, assert_instr(vpmuldq))]
1463	pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464	unsafe {
1465	let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1466	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1467	}
1468	}
1469
1470	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471	///
1472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473	#[inline]
1474	#[target_feature(enable = "avx512f,avx512vl")]
1475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1476	#[cfg_attr(test, assert_instr(vpmuldq))]
1477	pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478	unsafe {
1479	let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1480	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x2::ZERO))
1481	}
1482	}
1483
1484	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485	///
1486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487	#[inline]
1488	#[target_feature(enable = "avx512f")]
1489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1490	#[cfg_attr(test, assert_instr(vpmulld))]
1491	pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492	unsafe { transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16())) }
1493	}
1494
1495	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496	///
1497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498	#[inline]
1499	#[target_feature(enable = "avx512f")]
1500	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1501	#[cfg_attr(test, assert_instr(vpmulld))]
1502	pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503	unsafe {
1504	let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1505	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1506	}
1507	}
1508
1509	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510	///
1511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512	#[inline]
1513	#[target_feature(enable = "avx512f")]
1514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1515	#[cfg_attr(test, assert_instr(vpmulld))]
1516	pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517	unsafe {
1518	let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1519	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x16::ZERO))
1520	}
1521	}
1522
1523	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524	///
1525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526	#[inline]
1527	#[target_feature(enable = "avx512f,avx512vl")]
1528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1529	#[cfg_attr(test, assert_instr(vpmulld))]
1530	pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531	unsafe {
1532	let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1533	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1534	}
1535	}
1536
1537	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538	///
1539	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540	#[inline]
1541	#[target_feature(enable = "avx512f,avx512vl")]
1542	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543	#[cfg_attr(test, assert_instr(vpmulld))]
1544	pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545	unsafe {
1546	let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1547	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x8::ZERO))
1548	}
1549	}
1550
1551	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552	///
1553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554	#[inline]
1555	#[target_feature(enable = "avx512f,avx512vl")]
1556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1557	#[cfg_attr(test, assert_instr(vpmulld))]
1558	pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559	unsafe {
1560	let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1561	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1562	}
1563	}
1564
1565	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566	///
1567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568	#[inline]
1569	#[target_feature(enable = "avx512f,avx512vl")]
1570	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1571	#[cfg_attr(test, assert_instr(vpmulld))]
1572	pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573	unsafe {
1574	let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1575	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x4::ZERO))
1576	}
1577	}
1578
1579	/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580	///
1581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582	///
1583	/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584	#[inline]
1585	#[target_feature(enable = "avx512f")]
1586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587	pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588	unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
1589	}
1590
1591	/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592	///
1593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594	///
1595	/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596	#[inline]
1597	#[target_feature(enable = "avx512f")]
1598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599	pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600	unsafe {
1601	let mul: i64x8 = _mm512_mullox_epi64(a, b).as_i64x8();
1602	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1603	}
1604	}
1605
1606	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607	///
1608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609	#[inline]
1610	#[target_feature(enable = "avx512f")]
1611	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1612	#[cfg_attr(test, assert_instr(vpmuludq))]
1613	pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614	unsafe {
1615	let a: u64x8 = a.as_u64x8();
1616	let b: u64x8 = b.as_u64x8();
1617	let mask: u64x8 = u64x8::splat(u32::MAX.into());
1618	transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
1619	}
1620	}
1621
1622	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623	///
1624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625	#[inline]
1626	#[target_feature(enable = "avx512f")]
1627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628	#[cfg_attr(test, assert_instr(vpmuludq))]
1629	pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630	unsafe {
1631	let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1632	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1633	}
1634	}
1635
1636	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637	///
1638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639	#[inline]
1640	#[target_feature(enable = "avx512f")]
1641	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1642	#[cfg_attr(test, assert_instr(vpmuludq))]
1643	pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644	unsafe {
1645	let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1646	transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x8::ZERO))
1647	}
1648	}
1649
1650	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651	///
1652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653	#[inline]
1654	#[target_feature(enable = "avx512f,avx512vl")]
1655	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1656	#[cfg_attr(test, assert_instr(vpmuludq))]
1657	pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658	unsafe {
1659	let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1660	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1661	}
1662	}
1663
1664	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665	///
1666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667	#[inline]
1668	#[target_feature(enable = "avx512f,avx512vl")]
1669	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1670	#[cfg_attr(test, assert_instr(vpmuludq))]
1671	pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672	unsafe {
1673	let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1674	transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x4::ZERO))
1675	}
1676	}
1677
1678	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679	///
1680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681	#[inline]
1682	#[target_feature(enable = "avx512f,avx512vl")]
1683	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1684	#[cfg_attr(test, assert_instr(vpmuludq))]
1685	pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686	unsafe {
1687	let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1688	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1689	}
1690	}
1691
1692	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693	///
1694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695	#[inline]
1696	#[target_feature(enable = "avx512f,avx512vl")]
1697	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698	#[cfg_attr(test, assert_instr(vpmuludq))]
1699	pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700	unsafe {
1701	let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1702	transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x2::ZERO))
1703	}
1704	}
1705
1706	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707	///
1708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709	#[inline]
1710	#[target_feature(enable = "avx512f")]
1711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712	#[cfg_attr(test, assert_instr(vmulps))]
1713	pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714	unsafe { transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16())) }
1715	}
1716
1717	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718	///
1719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720	#[inline]
1721	#[target_feature(enable = "avx512f")]
1722	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1723	#[cfg_attr(test, assert_instr(vmulps))]
1724	pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725	unsafe {
1726	let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1727	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1728	}
1729	}
1730
1731	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732	///
1733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734	#[inline]
1735	#[target_feature(enable = "avx512f")]
1736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737	#[cfg_attr(test, assert_instr(vmulps))]
1738	pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739	unsafe {
1740	let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1741	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x16::ZERO))
1742	}
1743	}
1744
1745	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746	///
1747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748	#[inline]
1749	#[target_feature(enable = "avx512f,avx512vl")]
1750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1751	#[cfg_attr(test, assert_instr(vmulps))]
1752	pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753	unsafe {
1754	let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1755	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1756	}
1757	}
1758
1759	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760	///
1761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762	#[inline]
1763	#[target_feature(enable = "avx512f,avx512vl")]
1764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1765	#[cfg_attr(test, assert_instr(vmulps))]
1766	pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767	unsafe {
1768	let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1769	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x8::ZERO))
1770	}
1771	}
1772
1773	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774	///
1775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776	#[inline]
1777	#[target_feature(enable = "avx512f,avx512vl")]
1778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1779	#[cfg_attr(test, assert_instr(vmulps))]
1780	pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781	unsafe {
1782	let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1783	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1784	}
1785	}
1786
1787	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788	///
1789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790	#[inline]
1791	#[target_feature(enable = "avx512f,avx512vl")]
1792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793	#[cfg_attr(test, assert_instr(vmulps))]
1794	pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795	unsafe {
1796	let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1797	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x4::ZERO))
1798	}
1799	}
1800
1801	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802	///
1803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804	#[inline]
1805	#[target_feature(enable = "avx512f")]
1806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1807	#[cfg_attr(test, assert_instr(vmulpd))]
1808	pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809	unsafe { transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8())) }
1810	}
1811
1812	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813	///
1814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815	#[inline]
1816	#[target_feature(enable = "avx512f")]
1817	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1818	#[cfg_attr(test, assert_instr(vmulpd))]
1819	pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820	unsafe {
1821	let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1822	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1823	}
1824	}
1825
1826	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827	///
1828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829	#[inline]
1830	#[target_feature(enable = "avx512f")]
1831	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832	#[cfg_attr(test, assert_instr(vmulpd))]
1833	pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834	unsafe {
1835	let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1836	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x8::ZERO))
1837	}
1838	}
1839
1840	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841	///
1842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843	#[inline]
1844	#[target_feature(enable = "avx512f,avx512vl")]
1845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1846	#[cfg_attr(test, assert_instr(vmulpd))]
1847	pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848	unsafe {
1849	let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1850	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1851	}
1852	}
1853
1854	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855	///
1856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857	#[inline]
1858	#[target_feature(enable = "avx512f,avx512vl")]
1859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860	#[cfg_attr(test, assert_instr(vmulpd))]
1861	pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862	unsafe {
1863	let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1864	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x4::ZERO))
1865	}
1866	}
1867
1868	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869	///
1870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871	#[inline]
1872	#[target_feature(enable = "avx512f,avx512vl")]
1873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874	#[cfg_attr(test, assert_instr(vmulpd))]
1875	pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876	unsafe {
1877	let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1878	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
1879	}
1880	}
1881
1882	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883	///
1884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885	#[inline]
1886	#[target_feature(enable = "avx512f,avx512vl")]
1887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888	#[cfg_attr(test, assert_instr(vmulpd))]
1889	pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890	unsafe {
1891	let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1892	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x2::ZERO))
1893	}
1894	}
1895
1896	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897	///
1898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899	#[inline]
1900	#[target_feature(enable = "avx512f")]
1901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902	#[cfg_attr(test, assert_instr(vdivps))]
1903	pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904	unsafe { transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1905	}
1906
1907	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908	///
1909	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910	#[inline]
1911	#[target_feature(enable = "avx512f")]
1912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1913	#[cfg_attr(test, assert_instr(vdivps))]
1914	pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915	unsafe {
1916	let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1917	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
1918	}
1919	}
1920
1921	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922	///
1923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924	#[inline]
1925	#[target_feature(enable = "avx512f")]
1926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1927	#[cfg_attr(test, assert_instr(vdivps))]
1928	pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929	unsafe {
1930	let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1931	transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x16::ZERO))
1932	}
1933	}
1934
1935	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936	///
1937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938	#[inline]
1939	#[target_feature(enable = "avx512f,avx512vl")]
1940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1941	#[cfg_attr(test, assert_instr(vdivps))]
1942	pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943	unsafe {
1944	let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1945	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
1946	}
1947	}
1948
1949	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950	///
1951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952	#[inline]
1953	#[target_feature(enable = "avx512f,avx512vl")]
1954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955	#[cfg_attr(test, assert_instr(vdivps))]
1956	pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957	unsafe {
1958	let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1959	transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x8::ZERO))
1960	}
1961	}
1962
1963	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964	///
1965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966	#[inline]
1967	#[target_feature(enable = "avx512f,avx512vl")]
1968	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1969	#[cfg_attr(test, assert_instr(vdivps))]
1970	pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971	unsafe {
1972	let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1973	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
1974	}
1975	}
1976
1977	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978	///
1979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980	#[inline]
1981	#[target_feature(enable = "avx512f,avx512vl")]
1982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1983	#[cfg_attr(test, assert_instr(vdivps))]
1984	pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985	unsafe {
1986	let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1987	transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x4::ZERO))
1988	}
1989	}
1990
1991	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992	///
1993	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994	#[inline]
1995	#[target_feature(enable = "avx512f")]
1996	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1997	#[cfg_attr(test, assert_instr(vdivpd))]
1998	pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999	unsafe { transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
2000	}
2001
2002	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003	///
2004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005	#[inline]
2006	#[target_feature(enable = "avx512f")]
2007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2008	#[cfg_attr(test, assert_instr(vdivpd))]
2009	pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010	unsafe {
2011	let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
2012	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
2013	}
2014	}
2015
2016	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017	///
2018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019	#[inline]
2020	#[target_feature(enable = "avx512f")]
2021	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2022	#[cfg_attr(test, assert_instr(vdivpd))]
2023	pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024	unsafe {
2025	let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
2026	transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x8::ZERO))
2027	}
2028	}
2029
2030	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031	///
2032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033	#[inline]
2034	#[target_feature(enable = "avx512f,avx512vl")]
2035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2036	#[cfg_attr(test, assert_instr(vdivpd))]
2037	pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038	unsafe {
2039	let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
2040	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
2041	}
2042	}
2043
2044	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045	///
2046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047	#[inline]
2048	#[target_feature(enable = "avx512f,avx512vl")]
2049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2050	#[cfg_attr(test, assert_instr(vdivpd))]
2051	pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052	unsafe {
2053	let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
2054	transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x4::ZERO))
2055	}
2056	}
2057
2058	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059	///
2060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061	#[inline]
2062	#[target_feature(enable = "avx512f,avx512vl")]
2063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064	#[cfg_attr(test, assert_instr(vdivpd))]
2065	pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066	unsafe {
2067	let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
2068	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
2069	}
2070	}
2071
2072	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073	///
2074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075	#[inline]
2076	#[target_feature(enable = "avx512f,avx512vl")]
2077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2078	#[cfg_attr(test, assert_instr(vdivpd))]
2079	pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080	unsafe {
2081	let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
2082	transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x2::ZERO))
2083	}
2084	}
2085
2086	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087	///
2088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089	#[inline]
2090	#[target_feature(enable = "avx512f")]
2091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2092	#[cfg_attr(test, assert_instr(vpmaxsd))]
2093	pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094	unsafe {
2095	let a: i32x16 = a.as_i32x16();
2096	let b: i32x16 = b.as_i32x16();
2097	transmute(src:simd_select::<i32x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2098	}
2099	}
2100
2101	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102	///
2103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104	#[inline]
2105	#[target_feature(enable = "avx512f")]
2106	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2107	#[cfg_attr(test, assert_instr(vpmaxsd))]
2108	pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109	unsafe {
2110	let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
2111	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
2112	}
2113	}
2114
2115	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116	///
2117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118	#[inline]
2119	#[target_feature(enable = "avx512f")]
2120	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2121	#[cfg_attr(test, assert_instr(vpmaxsd))]
2122	pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123	unsafe {
2124	let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
2125	transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x16::ZERO))
2126	}
2127	}
2128
2129	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130	///
2131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132	#[inline]
2133	#[target_feature(enable = "avx512f,avx512vl")]
2134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135	#[cfg_attr(test, assert_instr(vpmaxsd))]
2136	pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137	unsafe {
2138	let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
2139	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
2140	}
2141	}
2142
2143	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144	///
2145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146	#[inline]
2147	#[target_feature(enable = "avx512f,avx512vl")]
2148	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149	#[cfg_attr(test, assert_instr(vpmaxsd))]
2150	pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151	unsafe {
2152	let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
2153	transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x8::ZERO))
2154	}
2155	}
2156
2157	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158	///
2159	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160	#[inline]
2161	#[target_feature(enable = "avx512f,avx512vl")]
2162	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2163	#[cfg_attr(test, assert_instr(vpmaxsd))]
2164	pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165	unsafe {
2166	let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
2167	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
2168	}
2169	}
2170
2171	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172	///
2173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174	#[inline]
2175	#[target_feature(enable = "avx512f,avx512vl")]
2176	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2177	#[cfg_attr(test, assert_instr(vpmaxsd))]
2178	pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179	unsafe {
2180	let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
2181	transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x4::ZERO))
2182	}
2183	}
2184
2185	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186	///
2187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188	#[inline]
2189	#[target_feature(enable = "avx512f")]
2190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191	#[cfg_attr(test, assert_instr(vpmaxsq))]
2192	pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193	unsafe {
2194	let a: i64x8 = a.as_i64x8();
2195	let b: i64x8 = b.as_i64x8();
2196	transmute(src:simd_select::<i64x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2197	}
2198	}
2199
2200	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201	///
2202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203	#[inline]
2204	#[target_feature(enable = "avx512f")]
2205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206	#[cfg_attr(test, assert_instr(vpmaxsq))]
2207	pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208	unsafe {
2209	let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2210	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2211	}
2212	}
2213
2214	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215	///
2216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217	#[inline]
2218	#[target_feature(enable = "avx512f")]
2219	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220	#[cfg_attr(test, assert_instr(vpmaxsq))]
2221	pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222	unsafe {
2223	let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2224	transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x8::ZERO))
2225	}
2226	}
2227
2228	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229	///
2230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231	#[inline]
2232	#[target_feature(enable = "avx512f,avx512vl")]
2233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234	#[cfg_attr(test, assert_instr(vpmaxsq))]
2235	pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236	unsafe {
2237	let a: i64x4 = a.as_i64x4();
2238	let b: i64x4 = b.as_i64x4();
2239	transmute(src:simd_select::<i64x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2240	}
2241	}
2242
2243	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244	///
2245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246	#[inline]
2247	#[target_feature(enable = "avx512f,avx512vl")]
2248	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2249	#[cfg_attr(test, assert_instr(vpmaxsq))]
2250	pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251	unsafe {
2252	let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2253	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2254	}
2255	}
2256
2257	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258	///
2259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260	#[inline]
2261	#[target_feature(enable = "avx512f,avx512vl")]
2262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263	#[cfg_attr(test, assert_instr(vpmaxsq))]
2264	pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265	unsafe {
2266	let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2267	transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x4::ZERO))
2268	}
2269	}
2270
2271	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272	///
2273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274	#[inline]
2275	#[target_feature(enable = "avx512f,avx512vl")]
2276	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277	#[cfg_attr(test, assert_instr(vpmaxsq))]
2278	pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279	unsafe {
2280	let a: i64x2 = a.as_i64x2();
2281	let b: i64x2 = b.as_i64x2();
2282	transmute(src:simd_select::<i64x2, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2283	}
2284	}
2285
2286	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287	///
2288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289	#[inline]
2290	#[target_feature(enable = "avx512f,avx512vl")]
2291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292	#[cfg_attr(test, assert_instr(vpmaxsq))]
2293	pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294	unsafe {
2295	let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2296	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2297	}
2298	}
2299
2300	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301	///
2302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303	#[inline]
2304	#[target_feature(enable = "avx512f,avx512vl")]
2305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2306	#[cfg_attr(test, assert_instr(vpmaxsq))]
2307	pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308	unsafe {
2309	let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2310	transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x2::ZERO))
2311	}
2312	}
2313
2314	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315	///
2316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317	#[inline]
2318	#[target_feature(enable = "avx512f")]
2319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2320	#[cfg_attr(test, assert_instr(vmaxps))]
2321	pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322	unsafe {
2323	transmute(src:vmaxps(
2324	a.as_f32x16(),
2325	b.as_f32x16(),
2326	_MM_FROUND_CUR_DIRECTION,
2327	))
2328	}
2329	}
2330
2331	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332	///
2333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334	#[inline]
2335	#[target_feature(enable = "avx512f")]
2336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337	#[cfg_attr(test, assert_instr(vmaxps))]
2338	pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339	unsafe {
2340	let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2341	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2342	}
2343	}
2344
2345	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346	///
2347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348	#[inline]
2349	#[target_feature(enable = "avx512f")]
2350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2351	#[cfg_attr(test, assert_instr(vmaxps))]
2352	pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353	unsafe {
2354	let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2355	transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x16::ZERO))
2356	}
2357	}
2358
2359	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360	///
2361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362	#[inline]
2363	#[target_feature(enable = "avx512f,avx512vl")]
2364	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2365	#[cfg_attr(test, assert_instr(vmaxps))]
2366	pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367	unsafe {
2368	let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2369	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2370	}
2371	}
2372
2373	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374	///
2375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376	#[inline]
2377	#[target_feature(enable = "avx512f,avx512vl")]
2378	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379	#[cfg_attr(test, assert_instr(vmaxps))]
2380	pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381	unsafe {
2382	let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2383	transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x8::ZERO))
2384	}
2385	}
2386
2387	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388	///
2389	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390	#[inline]
2391	#[target_feature(enable = "avx512f,avx512vl")]
2392	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2393	#[cfg_attr(test, assert_instr(vmaxps))]
2394	pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395	unsafe {
2396	let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2397	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2398	}
2399	}
2400
2401	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402	///
2403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404	#[inline]
2405	#[target_feature(enable = "avx512f,avx512vl")]
2406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2407	#[cfg_attr(test, assert_instr(vmaxps))]
2408	pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409	unsafe {
2410	let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2411	transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x4::ZERO))
2412	}
2413	}
2414
2415	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416	///
2417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418	#[inline]
2419	#[target_feature(enable = "avx512f")]
2420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2421	#[cfg_attr(test, assert_instr(vmaxpd))]
2422	pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423	unsafe { transmute(src:vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424	}
2425
2426	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427	///
2428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429	#[inline]
2430	#[target_feature(enable = "avx512f")]
2431	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2432	#[cfg_attr(test, assert_instr(vmaxpd))]
2433	pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434	unsafe {
2435	let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2436	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2437	}
2438	}
2439
2440	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441	///
2442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443	#[inline]
2444	#[target_feature(enable = "avx512f")]
2445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446	#[cfg_attr(test, assert_instr(vmaxpd))]
2447	pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448	unsafe {
2449	let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2450	transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x8::ZERO))
2451	}
2452	}
2453
2454	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455	///
2456	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457	#[inline]
2458	#[target_feature(enable = "avx512f,avx512vl")]
2459	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2460	#[cfg_attr(test, assert_instr(vmaxpd))]
2461	pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462	unsafe {
2463	let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2464	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2465	}
2466	}
2467
2468	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469	///
2470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471	#[inline]
2472	#[target_feature(enable = "avx512f,avx512vl")]
2473	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2474	#[cfg_attr(test, assert_instr(vmaxpd))]
2475	pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476	unsafe {
2477	let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2478	transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x4::ZERO))
2479	}
2480	}
2481
2482	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483	///
2484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485	#[inline]
2486	#[target_feature(enable = "avx512f,avx512vl")]
2487	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488	#[cfg_attr(test, assert_instr(vmaxpd))]
2489	pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490	unsafe {
2491	let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2492	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2493	}
2494	}
2495
2496	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497	///
2498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499	#[inline]
2500	#[target_feature(enable = "avx512f,avx512vl")]
2501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502	#[cfg_attr(test, assert_instr(vmaxpd))]
2503	pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504	unsafe {
2505	let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2506	transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x2::ZERO))
2507	}
2508	}
2509
2510	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511	///
2512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513	#[inline]
2514	#[target_feature(enable = "avx512f")]
2515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2516	#[cfg_attr(test, assert_instr(vpmaxud))]
2517	pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518	unsafe {
2519	let a: u32x16 = a.as_u32x16();
2520	let b: u32x16 = b.as_u32x16();
2521	transmute(src:simd_select::<i32x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2522	}
2523	}
2524
2525	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526	///
2527	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528	#[inline]
2529	#[target_feature(enable = "avx512f")]
2530	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2531	#[cfg_attr(test, assert_instr(vpmaxud))]
2532	pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533	unsafe {
2534	let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2535	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2536	}
2537	}
2538
2539	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540	///
2541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542	#[inline]
2543	#[target_feature(enable = "avx512f")]
2544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2545	#[cfg_attr(test, assert_instr(vpmaxud))]
2546	pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547	unsafe {
2548	let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2549	transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x16::ZERO))
2550	}
2551	}
2552
2553	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554	///
2555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556	#[inline]
2557	#[target_feature(enable = "avx512f,avx512vl")]
2558	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2559	#[cfg_attr(test, assert_instr(vpmaxud))]
2560	pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561	unsafe {
2562	let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2563	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2564	}
2565	}
2566
2567	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568	///
2569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570	#[inline]
2571	#[target_feature(enable = "avx512f,avx512vl")]
2572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2573	#[cfg_attr(test, assert_instr(vpmaxud))]
2574	pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575	unsafe {
2576	let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2577	transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x8::ZERO))
2578	}
2579	}
2580
2581	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582	///
2583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584	#[inline]
2585	#[target_feature(enable = "avx512f,avx512vl")]
2586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587	#[cfg_attr(test, assert_instr(vpmaxud))]
2588	pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589	unsafe {
2590	let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2591	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2592	}
2593	}
2594
2595	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596	///
2597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598	#[inline]
2599	#[target_feature(enable = "avx512f,avx512vl")]
2600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601	#[cfg_attr(test, assert_instr(vpmaxud))]
2602	pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603	unsafe {
2604	let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2605	transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x4::ZERO))
2606	}
2607	}
2608
2609	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610	///
2611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612	#[inline]
2613	#[target_feature(enable = "avx512f")]
2614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615	#[cfg_attr(test, assert_instr(vpmaxuq))]
2616	pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617	unsafe {
2618	let a: u64x8 = a.as_u64x8();
2619	let b: u64x8 = b.as_u64x8();
2620	transmute(src:simd_select::<i64x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2621	}
2622	}
2623
2624	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625	///
2626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627	#[inline]
2628	#[target_feature(enable = "avx512f")]
2629	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2630	#[cfg_attr(test, assert_instr(vpmaxuq))]
2631	pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632	unsafe {
2633	let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2634	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2635	}
2636	}
2637
2638	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639	///
2640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641	#[inline]
2642	#[target_feature(enable = "avx512f")]
2643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644	#[cfg_attr(test, assert_instr(vpmaxuq))]
2645	pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646	unsafe {
2647	let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2648	transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x8::ZERO))
2649	}
2650	}
2651
2652	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653	///
2654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655	#[inline]
2656	#[target_feature(enable = "avx512f,avx512vl")]
2657	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658	#[cfg_attr(test, assert_instr(vpmaxuq))]
2659	pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660	unsafe {
2661	let a: u64x4 = a.as_u64x4();
2662	let b: u64x4 = b.as_u64x4();
2663	transmute(src:simd_select::<i64x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2664	}
2665	}
2666
2667	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668	///
2669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670	#[inline]
2671	#[target_feature(enable = "avx512f,avx512vl")]
2672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2673	#[cfg_attr(test, assert_instr(vpmaxuq))]
2674	pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675	unsafe {
2676	let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2677	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2678	}
2679	}
2680
2681	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682	///
2683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684	#[inline]
2685	#[target_feature(enable = "avx512f,avx512vl")]
2686	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2687	#[cfg_attr(test, assert_instr(vpmaxuq))]
2688	pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689	unsafe {
2690	let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2691	transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x4::ZERO))
2692	}
2693	}
2694
2695	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696	///
2697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698	#[inline]
2699	#[target_feature(enable = "avx512f,avx512vl")]
2700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2701	#[cfg_attr(test, assert_instr(vpmaxuq))]
2702	pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703	unsafe {
2704	let a: u64x2 = a.as_u64x2();
2705	let b: u64x2 = b.as_u64x2();
2706	transmute(src:simd_select::<i64x2, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2707	}
2708	}
2709
2710	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711	///
2712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713	#[inline]
2714	#[target_feature(enable = "avx512f,avx512vl")]
2715	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2716	#[cfg_attr(test, assert_instr(vpmaxuq))]
2717	pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718	unsafe {
2719	let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2720	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2721	}
2722	}
2723
2724	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725	///
2726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727	#[inline]
2728	#[target_feature(enable = "avx512f,avx512vl")]
2729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2730	#[cfg_attr(test, assert_instr(vpmaxuq))]
2731	pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732	unsafe {
2733	let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2734	transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x2::ZERO))
2735	}
2736	}
2737
2738	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739	///
2740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741	#[inline]
2742	#[target_feature(enable = "avx512f")]
2743	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744	#[cfg_attr(test, assert_instr(vpminsd))]
2745	pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746	unsafe {
2747	let a: i32x16 = a.as_i32x16();
2748	let b: i32x16 = b.as_i32x16();
2749	transmute(src:simd_select::<i32x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2750	}
2751	}
2752
2753	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754	///
2755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756	#[inline]
2757	#[target_feature(enable = "avx512f")]
2758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2759	#[cfg_attr(test, assert_instr(vpminsd))]
2760	pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761	unsafe {
2762	let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2763	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2764	}
2765	}
2766
2767	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768	///
2769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770	#[inline]
2771	#[target_feature(enable = "avx512f")]
2772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773	#[cfg_attr(test, assert_instr(vpminsd))]
2774	pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775	unsafe {
2776	let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2777	transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x16::ZERO))
2778	}
2779	}
2780
2781	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782	///
2783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784	#[inline]
2785	#[target_feature(enable = "avx512f,avx512vl")]
2786	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2787	#[cfg_attr(test, assert_instr(vpminsd))]
2788	pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789	unsafe {
2790	let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2791	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2792	}
2793	}
2794
2795	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796	///
2797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798	#[inline]
2799	#[target_feature(enable = "avx512f,avx512vl")]
2800	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2801	#[cfg_attr(test, assert_instr(vpminsd))]
2802	pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803	unsafe {
2804	let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2805	transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x8::ZERO))
2806	}
2807	}
2808
2809	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810	///
2811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812	#[inline]
2813	#[target_feature(enable = "avx512f,avx512vl")]
2814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2815	#[cfg_attr(test, assert_instr(vpminsd))]
2816	pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817	unsafe {
2818	let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2819	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2820	}
2821	}
2822
2823	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824	///
2825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826	#[inline]
2827	#[target_feature(enable = "avx512f,avx512vl")]
2828	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2829	#[cfg_attr(test, assert_instr(vpminsd))]
2830	pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831	unsafe {
2832	let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2833	transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x4::ZERO))
2834	}
2835	}
2836
2837	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838	///
2839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840	#[inline]
2841	#[target_feature(enable = "avx512f")]
2842	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843	#[cfg_attr(test, assert_instr(vpminsq))]
2844	pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845	unsafe {
2846	let a: i64x8 = a.as_i64x8();
2847	let b: i64x8 = b.as_i64x8();
2848	transmute(src:simd_select::<i64x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2849	}
2850	}
2851
2852	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853	///
2854	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855	#[inline]
2856	#[target_feature(enable = "avx512f")]
2857	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2858	#[cfg_attr(test, assert_instr(vpminsq))]
2859	pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860	unsafe {
2861	let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2862	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
2863	}
2864	}
2865
2866	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867	///
2868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869	#[inline]
2870	#[target_feature(enable = "avx512f")]
2871	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2872	#[cfg_attr(test, assert_instr(vpminsq))]
2873	pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874	unsafe {
2875	let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2876	transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x8::ZERO))
2877	}
2878	}
2879
2880	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881	///
2882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883	#[inline]
2884	#[target_feature(enable = "avx512f,avx512vl")]
2885	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2886	#[cfg_attr(test, assert_instr(vpminsq))]
2887	pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888	unsafe {
2889	let a: i64x4 = a.as_i64x4();
2890	let b: i64x4 = b.as_i64x4();
2891	transmute(src:simd_select::<i64x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2892	}
2893	}
2894
2895	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896	///
2897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898	#[inline]
2899	#[target_feature(enable = "avx512f,avx512vl")]
2900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901	#[cfg_attr(test, assert_instr(vpminsq))]
2902	pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903	unsafe {
2904	let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2905	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
2906	}
2907	}
2908
2909	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910	///
2911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912	#[inline]
2913	#[target_feature(enable = "avx512f,avx512vl")]
2914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2915	#[cfg_attr(test, assert_instr(vpminsq))]
2916	pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917	unsafe {
2918	let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2919	transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x4::ZERO))
2920	}
2921	}
2922
2923	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924	///
2925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926	#[inline]
2927	#[target_feature(enable = "avx512f,avx512vl")]
2928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929	#[cfg_attr(test, assert_instr(vpminsq))]
2930	pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931	unsafe {
2932	let a: i64x2 = a.as_i64x2();
2933	let b: i64x2 = b.as_i64x2();
2934	transmute(src:simd_select::<i64x2, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2935	}
2936	}
2937
2938	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939	///
2940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941	#[inline]
2942	#[target_feature(enable = "avx512f,avx512vl")]
2943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2944	#[cfg_attr(test, assert_instr(vpminsq))]
2945	pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946	unsafe {
2947	let min: i64x2 = _mm_min_epi64(a, b).as_i64x2();
2948	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x2()))
2949	}
2950	}
2951
2952	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953	///
2954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955	#[inline]
2956	#[target_feature(enable = "avx512f,avx512vl")]
2957	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958	#[cfg_attr(test, assert_instr(vpminsq))]
2959	pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960	unsafe {
2961	let min: i64x2 = _mm_min_epi64(a, b).as_i64x2();
2962	transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x2::ZERO))
2963	}
2964	}
2965
2966	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967	///
2968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969	#[inline]
2970	#[target_feature(enable = "avx512f")]
2971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2972	#[cfg_attr(test, assert_instr(vminps))]
2973	pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974	unsafe {
2975	transmute(src:vminps(
2976	a.as_f32x16(),
2977	b.as_f32x16(),
2978	_MM_FROUND_CUR_DIRECTION,
2979	))
2980	}
2981	}
2982
2983	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984	///
2985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986	#[inline]
2987	#[target_feature(enable = "avx512f")]
2988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989	#[cfg_attr(test, assert_instr(vminps))]
2990	pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991	unsafe {
2992	let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2993	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
2994	}
2995	}
2996
2997	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998	///
2999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000	#[inline]
3001	#[target_feature(enable = "avx512f")]
3002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3003	#[cfg_attr(test, assert_instr(vminps))]
3004	pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005	unsafe {
3006	let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
3007	transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x16::ZERO))
3008	}
3009	}
3010
3011	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012	///
3013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014	#[inline]
3015	#[target_feature(enable = "avx512f,avx512vl")]
3016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3017	#[cfg_attr(test, assert_instr(vminps))]
3018	pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019	unsafe {
3020	let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
3021	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
3022	}
3023	}
3024
3025	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026	///
3027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028	#[inline]
3029	#[target_feature(enable = "avx512f,avx512vl")]
3030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031	#[cfg_attr(test, assert_instr(vminps))]
3032	pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033	unsafe {
3034	let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
3035	transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x8::ZERO))
3036	}
3037	}
3038
3039	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040	///
3041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042	#[inline]
3043	#[target_feature(enable = "avx512f,avx512vl")]
3044	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3045	#[cfg_attr(test, assert_instr(vminps))]
3046	pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047	unsafe {
3048	let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
3049	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
3050	}
3051	}
3052
3053	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054	///
3055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056	#[inline]
3057	#[target_feature(enable = "avx512f,avx512vl")]
3058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3059	#[cfg_attr(test, assert_instr(vminps))]
3060	pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061	unsafe {
3062	let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
3063	transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x4::ZERO))
3064	}
3065	}
3066
3067	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068	///
3069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070	#[inline]
3071	#[target_feature(enable = "avx512f")]
3072	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3073	#[cfg_attr(test, assert_instr(vminpd))]
3074	pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075	unsafe { transmute(src:vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076	}
3077
3078	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079	///
3080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081	#[inline]
3082	#[target_feature(enable = "avx512f")]
3083	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3084	#[cfg_attr(test, assert_instr(vminpd))]
3085	pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086	unsafe {
3087	let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
3088	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
3089	}
3090	}
3091
3092	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093	///
3094	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095	#[inline]
3096	#[target_feature(enable = "avx512f")]
3097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098	#[cfg_attr(test, assert_instr(vminpd))]
3099	pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100	unsafe {
3101	let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
3102	transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x8::ZERO))
3103	}
3104	}
3105
3106	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107	///
3108	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109	#[inline]
3110	#[target_feature(enable = "avx512f,avx512vl")]
3111	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3112	#[cfg_attr(test, assert_instr(vminpd))]
3113	pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114	unsafe {
3115	let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
3116	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
3117	}
3118	}
3119
3120	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121	///
3122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123	#[inline]
3124	#[target_feature(enable = "avx512f,avx512vl")]
3125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3126	#[cfg_attr(test, assert_instr(vminpd))]
3127	pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128	unsafe {
3129	let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
3130	transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x4::ZERO))
3131	}
3132	}
3133
3134	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135	///
3136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137	#[inline]
3138	#[target_feature(enable = "avx512f,avx512vl")]
3139	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140	#[cfg_attr(test, assert_instr(vminpd))]
3141	pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142	unsafe {
3143	let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
3144	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
3145	}
3146	}
3147
3148	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149	///
3150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151	#[inline]
3152	#[target_feature(enable = "avx512f,avx512vl")]
3153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154	#[cfg_attr(test, assert_instr(vminpd))]
3155	pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156	unsafe {
3157	let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
3158	transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x2::ZERO))
3159	}
3160	}
3161
3162	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163	///
3164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165	#[inline]
3166	#[target_feature(enable = "avx512f")]
3167	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3168	#[cfg_attr(test, assert_instr(vpminud))]
3169	pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170	unsafe {
3171	let a: u32x16 = a.as_u32x16();
3172	let b: u32x16 = b.as_u32x16();
3173	transmute(src:simd_select::<i32x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3174	}
3175	}
3176
3177	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178	///
3179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180	#[inline]
3181	#[target_feature(enable = "avx512f")]
3182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3183	#[cfg_attr(test, assert_instr(vpminud))]
3184	pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185	unsafe {
3186	let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
3187	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
3188	}
3189	}
3190
3191	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192	///
3193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194	#[inline]
3195	#[target_feature(enable = "avx512f")]
3196	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3197	#[cfg_attr(test, assert_instr(vpminud))]
3198	pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199	unsafe {
3200	let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
3201	transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x16::ZERO))
3202	}
3203	}
3204
3205	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206	///
3207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208	#[inline]
3209	#[target_feature(enable = "avx512f,avx512vl")]
3210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3211	#[cfg_attr(test, assert_instr(vpminud))]
3212	pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213	unsafe {
3214	let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
3215	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
3216	}
3217	}
3218
3219	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220	///
3221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222	#[inline]
3223	#[target_feature(enable = "avx512f,avx512vl")]
3224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3225	#[cfg_attr(test, assert_instr(vpminud))]
3226	pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227	unsafe {
3228	let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
3229	transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x8::ZERO))
3230	}
3231	}
3232
3233	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234	///
3235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236	#[inline]
3237	#[target_feature(enable = "avx512f,avx512vl")]
3238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239	#[cfg_attr(test, assert_instr(vpminud))]
3240	pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241	unsafe {
3242	let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
3243	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
3244	}
3245	}
3246
3247	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248	///
3249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250	#[inline]
3251	#[target_feature(enable = "avx512f,avx512vl")]
3252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253	#[cfg_attr(test, assert_instr(vpminud))]
3254	pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255	unsafe {
3256	let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
3257	transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x4::ZERO))
3258	}
3259	}
3260
3261	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262	///
3263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264	#[inline]
3265	#[target_feature(enable = "avx512f")]
3266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267	#[cfg_attr(test, assert_instr(vpminuq))]
3268	pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269	unsafe {
3270	let a: u64x8 = a.as_u64x8();
3271	let b: u64x8 = b.as_u64x8();
3272	transmute(src:simd_select::<i64x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3273	}
3274	}
3275
3276	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277	///
3278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279	#[inline]
3280	#[target_feature(enable = "avx512f")]
3281	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3282	#[cfg_attr(test, assert_instr(vpminuq))]
3283	pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284	unsafe {
3285	let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
3286	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
3287	}
3288	}
3289
3290	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291	///
3292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293	#[inline]
3294	#[target_feature(enable = "avx512f")]
3295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3296	#[cfg_attr(test, assert_instr(vpminuq))]
3297	pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298	unsafe {
3299	let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
3300	transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x8::ZERO))
3301	}
3302	}
3303
3304	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305	///
3306	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307	#[inline]
3308	#[target_feature(enable = "avx512f,avx512vl")]
3309	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3310	#[cfg_attr(test, assert_instr(vpminuq))]
3311	pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312	unsafe {
3313	let a: u64x4 = a.as_u64x4();
3314	let b: u64x4 = b.as_u64x4();
3315	transmute(src:simd_select::<i64x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3316	}
3317	}
3318
3319	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320	///
3321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322	#[inline]
3323	#[target_feature(enable = "avx512f,avx512vl")]
3324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3325	#[cfg_attr(test, assert_instr(vpminuq))]
3326	pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327	unsafe {
3328	let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
3329	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
3330	}
3331	}
3332
3333	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334	///
3335	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336	#[inline]
3337	#[target_feature(enable = "avx512f,avx512vl")]
3338	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339	#[cfg_attr(test, assert_instr(vpminuq))]
3340	pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341	unsafe {
3342	let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
3343	transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x4::ZERO))
3344	}
3345	}
3346
3347	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348	///
3349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350	#[inline]
3351	#[target_feature(enable = "avx512f,avx512vl")]
3352	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3353	#[cfg_attr(test, assert_instr(vpminuq))]
3354	pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355	unsafe {
3356	let a: u64x2 = a.as_u64x2();
3357	let b: u64x2 = b.as_u64x2();
3358	transmute(src:simd_select::<i64x2, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3359	}
3360	}
3361
3362	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363	///
3364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365	#[inline]
3366	#[target_feature(enable = "avx512f,avx512vl")]
3367	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368	#[cfg_attr(test, assert_instr(vpminuq))]
3369	pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370	unsafe {
3371	let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
3372	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
3373	}
3374	}
3375
3376	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377	///
3378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379	#[inline]
3380	#[target_feature(enable = "avx512f,avx512vl")]
3381	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382	#[cfg_attr(test, assert_instr(vpminuq))]
3383	pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384	unsafe {
3385	let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
3386	transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x2::ZERO))
3387	}
3388	}
3389
3390	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391	///
3392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393	#[inline]
3394	#[target_feature(enable = "avx512f")]
3395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396	#[cfg_attr(test, assert_instr(vsqrtps))]
3397	pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398	unsafe { simd_fsqrt(a) }
3399	}
3400
3401	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402	///
3403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404	#[inline]
3405	#[target_feature(enable = "avx512f")]
3406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407	#[cfg_attr(test, assert_instr(vsqrtps))]
3408	pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3410	}
3411
3412	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413	///
3414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415	#[inline]
3416	#[target_feature(enable = "avx512f")]
3417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418	#[cfg_attr(test, assert_instr(vsqrtps))]
3419	pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_ps()) }
3421	}
3422
3423	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424	///
3425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426	#[inline]
3427	#[target_feature(enable = "avx512f,avx512vl")]
3428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429	#[cfg_attr(test, assert_instr(vsqrtps))]
3430	pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3432	}
3433
3434	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435	///
3436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437	#[inline]
3438	#[target_feature(enable = "avx512f,avx512vl")]
3439	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440	#[cfg_attr(test, assert_instr(vsqrtps))]
3441	pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_ps()) }
3443	}
3444
3445	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446	///
3447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448	#[inline]
3449	#[target_feature(enable = "avx512f,avx512vl")]
3450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451	#[cfg_attr(test, assert_instr(vsqrtps))]
3452	pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3454	}
3455
3456	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457	///
3458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459	#[inline]
3460	#[target_feature(enable = "avx512f,avx512vl")]
3461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462	#[cfg_attr(test, assert_instr(vsqrtps))]
3463	pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_ps()) }
3465	}
3466
3467	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468	///
3469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470	#[inline]
3471	#[target_feature(enable = "avx512f")]
3472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473	#[cfg_attr(test, assert_instr(vsqrtpd))]
3474	pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475	unsafe { simd_fsqrt(a) }
3476	}
3477
3478	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479	///
3480	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481	#[inline]
3482	#[target_feature(enable = "avx512f")]
3483	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484	#[cfg_attr(test, assert_instr(vsqrtpd))]
3485	pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3487	}
3488
3489	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490	///
3491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492	#[inline]
3493	#[target_feature(enable = "avx512f")]
3494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495	#[cfg_attr(test, assert_instr(vsqrtpd))]
3496	pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_pd()) }
3498	}
3499
3500	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501	///
3502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503	#[inline]
3504	#[target_feature(enable = "avx512f,avx512vl")]
3505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506	#[cfg_attr(test, assert_instr(vsqrtpd))]
3507	pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3509	}
3510
3511	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512	///
3513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514	#[inline]
3515	#[target_feature(enable = "avx512f,avx512vl")]
3516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517	#[cfg_attr(test, assert_instr(vsqrtpd))]
3518	pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_pd()) }
3520	}
3521
3522	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523	///
3524	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525	#[inline]
3526	#[target_feature(enable = "avx512f,avx512vl")]
3527	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528	#[cfg_attr(test, assert_instr(vsqrtpd))]
3529	pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3531	}
3532
3533	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534	///
3535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536	#[inline]
3537	#[target_feature(enable = "avx512f,avx512vl")]
3538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539	#[cfg_attr(test, assert_instr(vsqrtpd))]
3540	pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_pd()) }
3542	}
3543
3544	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545	///
3546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547	#[inline]
3548	#[target_feature(enable = "avx512f")]
3549	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551	pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552	unsafe { simd_fma(x:a, y:b, z:c) }
3553	}
3554
3555	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556	///
3557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558	#[inline]
3559	#[target_feature(enable = "avx512f")]
3560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562	pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:a) }
3564	}
3565
3566	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567	///
3568	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569	#[inline]
3570	#[target_feature(enable = "avx512f")]
3571	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573	pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
3575	}
3576
3577	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578	///
3579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580	#[inline]
3581	#[target_feature(enable = "avx512f")]
3582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584	pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:c) }
3586	}
3587
3588	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589	///
3590	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591	#[inline]
3592	#[target_feature(enable = "avx512f,avx512vl")]
3593	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595	pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:a) }
3597	}
3598
3599	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600	///
3601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602	#[inline]
3603	#[target_feature(enable = "avx512f,avx512vl")]
3604	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606	pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
3608	}
3609
3610	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611	///
3612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613	#[inline]
3614	#[target_feature(enable = "avx512f,avx512vl")]
3615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617	pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:c) }
3619	}
3620
3621	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622	///
3623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624	#[inline]
3625	#[target_feature(enable = "avx512f,avx512vl")]
3626	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628	pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:a) }
3630	}
3631
3632	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633	///
3634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635	#[inline]
3636	#[target_feature(enable = "avx512f,avx512vl")]
3637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639	pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:_mm_setzero_ps()) }
3641	}
3642
3643	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644	///
3645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646	#[inline]
3647	#[target_feature(enable = "avx512f,avx512vl")]
3648	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650	pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:c) }
3652	}
3653
3654	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655	///
3656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657	#[inline]
3658	#[target_feature(enable = "avx512f")]
3659	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661	pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662	unsafe { simd_fma(x:a, y:b, z:c) }
3663	}
3664
3665	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666	///
3667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668	#[inline]
3669	#[target_feature(enable = "avx512f")]
3670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672	pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:a) }
3674	}
3675
3676	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677	///
3678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679	#[inline]
3680	#[target_feature(enable = "avx512f")]
3681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683	pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
3685	}
3686
3687	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688	///
3689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690	#[inline]
3691	#[target_feature(enable = "avx512f")]
3692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694	pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:c) }
3696	}
3697
3698	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699	///
3700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701	#[inline]
3702	#[target_feature(enable = "avx512f,avx512vl")]
3703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705	pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:a) }
3707	}
3708
3709	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710	///
3711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712	#[inline]
3713	#[target_feature(enable = "avx512f,avx512vl")]
3714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716	pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
3718	}
3719
3720	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721	///
3722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723	#[inline]
3724	#[target_feature(enable = "avx512f,avx512vl")]
3725	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727	pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:c) }
3729	}
3730
3731	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732	///
3733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734	#[inline]
3735	#[target_feature(enable = "avx512f,avx512vl")]
3736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738	pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:a) }
3740	}
3741
3742	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743	///
3744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745	#[inline]
3746	#[target_feature(enable = "avx512f,avx512vl")]
3747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749	pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:_mm_setzero_pd()) }
3751	}
3752
3753	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754	///
3755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756	#[inline]
3757	#[target_feature(enable = "avx512f,avx512vl")]
3758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760	pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:c) }
3762	}
3763
3764	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765	///
3766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767	#[inline]
3768	#[target_feature(enable = "avx512f")]
3769	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771	pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3773	}
3774
3775	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776	///
3777	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778	#[inline]
3779	#[target_feature(enable = "avx512f")]
3780	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782	pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:a) }
3784	}
3785
3786	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787	///
3788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789	#[inline]
3790	#[target_feature(enable = "avx512f")]
3791	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793	pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
3795	}
3796
3797	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798	///
3799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800	#[inline]
3801	#[target_feature(enable = "avx512f")]
3802	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804	pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:c) }
3806	}
3807
3808	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809	///
3810	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811	#[inline]
3812	#[target_feature(enable = "avx512f,avx512vl")]
3813	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815	pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:a) }
3817	}
3818
3819	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820	///
3821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822	#[inline]
3823	#[target_feature(enable = "avx512f,avx512vl")]
3824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826	pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
3828	}
3829
3830	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831	///
3832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833	#[inline]
3834	#[target_feature(enable = "avx512f,avx512vl")]
3835	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837	pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:c) }
3839	}
3840
3841	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842	///
3843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844	#[inline]
3845	#[target_feature(enable = "avx512f,avx512vl")]
3846	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848	pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:a) }
3850	}
3851
3852	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853	///
3854	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855	#[inline]
3856	#[target_feature(enable = "avx512f,avx512vl")]
3857	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859	pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:_mm_setzero_ps()) }
3861	}
3862
3863	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864	///
3865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866	#[inline]
3867	#[target_feature(enable = "avx512f,avx512vl")]
3868	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870	pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:c) }
3872	}
3873
3874	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875	///
3876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877	#[inline]
3878	#[target_feature(enable = "avx512f")]
3879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881	pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3883	}
3884
3885	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886	///
3887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888	#[inline]
3889	#[target_feature(enable = "avx512f")]
3890	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892	pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:a) }
3894	}
3895
3896	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897	///
3898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899	#[inline]
3900	#[target_feature(enable = "avx512f")]
3901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903	pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
3905	}
3906
3907	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908	///
3909	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910	#[inline]
3911	#[target_feature(enable = "avx512f")]
3912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914	pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:c) }
3916	}
3917
3918	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919	///
3920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921	#[inline]
3922	#[target_feature(enable = "avx512f,avx512vl")]
3923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925	pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:a) }
3927	}
3928
3929	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930	///
3931	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932	#[inline]
3933	#[target_feature(enable = "avx512f,avx512vl")]
3934	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936	pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
3938	}
3939
3940	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941	///
3942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943	#[inline]
3944	#[target_feature(enable = "avx512f,avx512vl")]
3945	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947	pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:c) }
3949	}
3950
3951	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952	///
3953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954	#[inline]
3955	#[target_feature(enable = "avx512f,avx512vl")]
3956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958	pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:a) }
3960	}
3961
3962	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963	///
3964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965	#[inline]
3966	#[target_feature(enable = "avx512f,avx512vl")]
3967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969	pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:_mm_setzero_pd()) }
3971	}
3972
3973	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974	///
3975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976	#[inline]
3977	#[target_feature(enable = "avx512f,avx512vl")]
3978	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980	pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:c) }
3982	}
3983
3984	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985	///
3986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987	#[inline]
3988	#[target_feature(enable = "avx512f")]
3989	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991	pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992	unsafe {
3993	let add: __m512 = simd_fma(x:a, y:b, z:c);
3994	let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
3995	simd_shuffle!(
3996	add,
3997	sub,
3998	[`16`, `1`, `18`, `3`, `20`, `5`, `22`, `7`, `24`, `9`, `26`, `11`, `28`, `13`, `30`, `15`]
3999	)
4000	}
4001	}
4002
4003	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004	///
4005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006	#[inline]
4007	#[target_feature(enable = "avx512f")]
4008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4009	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010	pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:a) }
4012	}
4013
4014	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015	///
4016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017	#[inline]
4018	#[target_feature(enable = "avx512f")]
4019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4020	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021	pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4023	}
4024
4025	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026	///
4027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028	#[inline]
4029	#[target_feature(enable = "avx512f")]
4030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032	pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:c) }
4034	}
4035
4036	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037	///
4038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039	#[inline]
4040	#[target_feature(enable = "avx512f,avx512vl")]
4041	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4042	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043	pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:a) }
4045	}
4046
4047	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048	///
4049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050	#[inline]
4051	#[target_feature(enable = "avx512f,avx512vl")]
4052	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4053	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054	pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4056	}
4057
4058	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059	///
4060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061	#[inline]
4062	#[target_feature(enable = "avx512f,avx512vl")]
4063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4064	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065	pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:c) }
4067	}
4068
4069	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070	///
4071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072	#[inline]
4073	#[target_feature(enable = "avx512f,avx512vl")]
4074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4075	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076	pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:a) }
4078	}
4079
4080	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081	///
4082	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083	#[inline]
4084	#[target_feature(enable = "avx512f,avx512vl")]
4085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087	pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:_mm_setzero_ps()) }
4089	}
4090
4091	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092	///
4093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094	#[inline]
4095	#[target_feature(enable = "avx512f,avx512vl")]
4096	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098	pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:c) }
4100	}
4101
4102	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103	///
4104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105	#[inline]
4106	#[target_feature(enable = "avx512f")]
4107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4108	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109	pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110	unsafe {
4111	let add: __m512d = simd_fma(x:a, y:b, z:c);
4112	let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4113	simd_shuffle!(add, sub, [`8`, `1`, `10`, `3`, `12`, `5`, `14`, `7`])
4114	}
4115	}
4116
4117	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118	///
4119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120	#[inline]
4121	#[target_feature(enable = "avx512f")]
4122	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4123	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124	pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:a) }
4126	}
4127
4128	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129	///
4130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131	#[inline]
4132	#[target_feature(enable = "avx512f")]
4133	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4134	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135	pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4137	}
4138
4139	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140	///
4141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142	#[inline]
4143	#[target_feature(enable = "avx512f")]
4144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4145	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146	pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:c) }
4148	}
4149
4150	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151	///
4152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153	#[inline]
4154	#[target_feature(enable = "avx512f,avx512vl")]
4155	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4156	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157	pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:a) }
4159	}
4160
4161	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162	///
4163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164	#[inline]
4165	#[target_feature(enable = "avx512f,avx512vl")]
4166	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4167	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168	pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4170	}
4171
4172	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173	///
4174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175	#[inline]
4176	#[target_feature(enable = "avx512f,avx512vl")]
4177	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4178	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179	pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:c) }
4181	}
4182
4183	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184	///
4185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186	#[inline]
4187	#[target_feature(enable = "avx512f,avx512vl")]
4188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4189	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190	pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:a) }
4192	}
4193
4194	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195	///
4196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197	#[inline]
4198	#[target_feature(enable = "avx512f,avx512vl")]
4199	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4200	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201	pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:_mm_setzero_pd()) }
4203	}
4204
4205	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206	///
4207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208	#[inline]
4209	#[target_feature(enable = "avx512f,avx512vl")]
4210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4211	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212	pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:c) }
4214	}
4215
4216	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217	///
4218	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219	#[inline]
4220	#[target_feature(enable = "avx512f")]
4221	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4222	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223	pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224	unsafe {
4225	let add: __m512 = simd_fma(x:a, y:b, z:c);
4226	let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4227	simd_shuffle!(
4228	add,
4229	sub,
4230	[`0`, `17`, `2`, `19`, `4`, `21`, `6`, `23`, `8`, `25`, `10`, `27`, `12`, `29`, `14`, `31`]
4231	)
4232	}
4233	}
4234
4235	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236	///
4237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238	#[inline]
4239	#[target_feature(enable = "avx512f")]
4240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4241	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242	pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:a) }
4244	}
4245
4246	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247	///
4248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249	#[inline]
4250	#[target_feature(enable = "avx512f")]
4251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253	pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4255	}
4256
4257	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258	///
4259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260	#[inline]
4261	#[target_feature(enable = "avx512f")]
4262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4263	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264	pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:c) }
4266	}
4267
4268	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269	///
4270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271	#[inline]
4272	#[target_feature(enable = "avx512f,avx512vl")]
4273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4274	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275	pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:a) }
4277	}
4278
4279	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280	///
4281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282	#[inline]
4283	#[target_feature(enable = "avx512f,avx512vl")]
4284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4285	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286	pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4288	}
4289
4290	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291	///
4292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293	#[inline]
4294	#[target_feature(enable = "avx512f,avx512vl")]
4295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4296	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297	pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:c) }
4299	}
4300
4301	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302	///
4303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304	#[inline]
4305	#[target_feature(enable = "avx512f,avx512vl")]
4306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4307	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308	pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:a) }
4310	}
4311
4312	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313	///
4314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315	#[inline]
4316	#[target_feature(enable = "avx512f,avx512vl")]
4317	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4318	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319	pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:_mm_setzero_ps()) }
4321	}
4322
4323	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324	///
4325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326	#[inline]
4327	#[target_feature(enable = "avx512f,avx512vl")]
4328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4329	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330	pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:c) }
4332	}
4333
4334	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335	///
4336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337	#[inline]
4338	#[target_feature(enable = "avx512f")]
4339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4340	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341	pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342	unsafe {
4343	let add: __m512d = simd_fma(x:a, y:b, z:c);
4344	let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4345	simd_shuffle!(add, sub, [`0`, `9`, `2`, `11`, `4`, `13`, `6`, `15`])
4346	}
4347	}
4348
4349	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350	///
4351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352	#[inline]
4353	#[target_feature(enable = "avx512f")]
4354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4355	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356	pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:a) }
4358	}
4359
4360	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361	///
4362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363	#[inline]
4364	#[target_feature(enable = "avx512f")]
4365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4366	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367	pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4369	}
4370
4371	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372	///
4373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374	#[inline]
4375	#[target_feature(enable = "avx512f")]
4376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4377	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378	pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:c) }
4380	}
4381
4382	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383	///
4384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385	#[inline]
4386	#[target_feature(enable = "avx512f,avx512vl")]
4387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389	pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:a) }
4391	}
4392
4393	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394	///
4395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396	#[inline]
4397	#[target_feature(enable = "avx512f,avx512vl")]
4398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4399	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400	pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4402	}
4403
4404	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405	///
4406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407	#[inline]
4408	#[target_feature(enable = "avx512f,avx512vl")]
4409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4410	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411	pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:c) }
4413	}
4414
4415	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416	///
4417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418	#[inline]
4419	#[target_feature(enable = "avx512f,avx512vl")]
4420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4421	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422	pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:a) }
4424	}
4425
4426	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427	///
4428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429	#[inline]
4430	#[target_feature(enable = "avx512f,avx512vl")]
4431	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4432	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433	pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:_mm_setzero_pd()) }
4435	}
4436
4437	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438	///
4439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440	#[inline]
4441	#[target_feature(enable = "avx512f,avx512vl")]
4442	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4443	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444	pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:c) }
4446	}
4447
4448	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449	///
4450	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451	#[inline]
4452	#[target_feature(enable = "avx512f")]
4453	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4454	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455	pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4457	}
4458
4459	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460	///
4461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462	#[inline]
4463	#[target_feature(enable = "avx512f")]
4464	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4465	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466	pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:a) }
4468	}
4469
4470	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471	///
4472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473	#[inline]
4474	#[target_feature(enable = "avx512f")]
4475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477	pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4479	}
4480
4481	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482	///
4483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484	#[inline]
4485	#[target_feature(enable = "avx512f")]
4486	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4487	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488	pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:c) }
4490	}
4491
4492	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493	///
4494	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495	#[inline]
4496	#[target_feature(enable = "avx512f,avx512vl")]
4497	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4498	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499	pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:a) }
4501	}
4502
4503	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504	///
4505	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506	#[inline]
4507	#[target_feature(enable = "avx512f,avx512vl")]
4508	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4509	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510	pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4512	}
4513
4514	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515	///
4516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517	#[inline]
4518	#[target_feature(enable = "avx512f,avx512vl")]
4519	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4520	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521	pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:c) }
4523	}
4524
4525	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526	///
4527	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528	#[inline]
4529	#[target_feature(enable = "avx512f,avx512vl")]
4530	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4531	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532	pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:a) }
4534	}
4535
4536	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537	///
4538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539	#[inline]
4540	#[target_feature(enable = "avx512f,avx512vl")]
4541	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4542	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543	pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:_mm_setzero_ps()) }
4545	}
4546
4547	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548	///
4549	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550	#[inline]
4551	#[target_feature(enable = "avx512f,avx512vl")]
4552	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4553	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554	pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:c) }
4556	}
4557
4558	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559	///
4560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561	#[inline]
4562	#[target_feature(enable = "avx512f")]
4563	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565	pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4567	}
4568
4569	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570	///
4571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572	#[inline]
4573	#[target_feature(enable = "avx512f")]
4574	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4575	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576	pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:a) }
4578	}
4579
4580	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581	///
4582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583	#[inline]
4584	#[target_feature(enable = "avx512f")]
4585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587	pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4589	}
4590
4591	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592	///
4593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594	#[inline]
4595	#[target_feature(enable = "avx512f")]
4596	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4597	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598	pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:c) }
4600	}
4601
4602	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603	///
4604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605	#[inline]
4606	#[target_feature(enable = "avx512f,avx512vl")]
4607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4608	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609	pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:a) }
4611	}
4612
4613	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614	///
4615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616	#[inline]
4617	#[target_feature(enable = "avx512f,avx512vl")]
4618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4619	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620	pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4622	}
4623
4624	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625	///
4626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627	#[inline]
4628	#[target_feature(enable = "avx512f,avx512vl")]
4629	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631	pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:c) }
4633	}
4634
4635	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636	///
4637	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638	#[inline]
4639	#[target_feature(enable = "avx512f,avx512vl")]
4640	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642	pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:a) }
4644	}
4645
4646	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647	///
4648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649	#[inline]
4650	#[target_feature(enable = "avx512f,avx512vl")]
4651	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4652	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653	pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:_mm_setzero_pd()) }
4655	}
4656
4657	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658	///
4659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660	#[inline]
4661	#[target_feature(enable = "avx512f,avx512vl")]
4662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664	pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:c) }
4666	}
4667
4668	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669	///
4670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671	#[inline]
4672	#[target_feature(enable = "avx512f")]
4673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4674	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675	pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4677	}
4678
4679	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680	///
4681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682	#[inline]
4683	#[target_feature(enable = "avx512f")]
4684	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4685	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686	pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:a) }
4688	}
4689
4690	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691	///
4692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693	#[inline]
4694	#[target_feature(enable = "avx512f")]
4695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4696	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697	pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4699	}
4700
4701	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702	///
4703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704	#[inline]
4705	#[target_feature(enable = "avx512f")]
4706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4707	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708	pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:c) }
4710	}
4711
4712	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713	///
4714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715	#[inline]
4716	#[target_feature(enable = "avx512f,avx512vl")]
4717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4718	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719	pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:a) }
4721	}
4722
4723	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724	///
4725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726	#[inline]
4727	#[target_feature(enable = "avx512f,avx512vl")]
4728	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4729	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730	pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4732	}
4733
4734	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735	///
4736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737	#[inline]
4738	#[target_feature(enable = "avx512f,avx512vl")]
4739	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4740	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741	pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:c) }
4743	}
4744
4745	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746	///
4747	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748	#[inline]
4749	#[target_feature(enable = "avx512f,avx512vl")]
4750	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4751	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752	pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:a) }
4754	}
4755
4756	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757	///
4758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759	#[inline]
4760	#[target_feature(enable = "avx512f,avx512vl")]
4761	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763	pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:_mm_setzero_ps()) }
4765	}
4766
4767	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768	///
4769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770	#[inline]
4771	#[target_feature(enable = "avx512f,avx512vl")]
4772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4773	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774	pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:c) }
4776	}
4777
4778	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779	///
4780	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781	#[inline]
4782	#[target_feature(enable = "avx512f")]
4783	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4784	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785	pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4787	}
4788
4789	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790	///
4791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792	#[inline]
4793	#[target_feature(enable = "avx512f")]
4794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4795	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796	pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:a) }
4798	}
4799
4800	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801	///
4802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803	#[inline]
4804	#[target_feature(enable = "avx512f")]
4805	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4806	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807	pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4809	}
4810
4811	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812	///
4813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814	#[inline]
4815	#[target_feature(enable = "avx512f")]
4816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4817	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818	pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:c) }
4820	}
4821
4822	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823	///
4824	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825	#[inline]
4826	#[target_feature(enable = "avx512f,avx512vl")]
4827	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4828	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829	pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:a) }
4831	}
4832
4833	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834	///
4835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836	#[inline]
4837	#[target_feature(enable = "avx512f,avx512vl")]
4838	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4839	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840	pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4842	}
4843
4844	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845	///
4846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847	#[inline]
4848	#[target_feature(enable = "avx512f,avx512vl")]
4849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4850	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851	pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:c) }
4853	}
4854
4855	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856	///
4857	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858	#[inline]
4859	#[target_feature(enable = "avx512f,avx512vl")]
4860	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4861	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862	pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:a) }
4864	}
4865
4866	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867	///
4868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869	#[inline]
4870	#[target_feature(enable = "avx512f,avx512vl")]
4871	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873	pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:_mm_setzero_pd()) }
4875	}
4876
4877	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878	///
4879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880	#[inline]
4881	#[target_feature(enable = "avx512f,avx512vl")]
4882	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884	pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:c) }
4886	}
4887
4888	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889	///
4890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891	#[inline]
4892	#[target_feature(enable = "avx512f")]
4893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894	#[cfg_attr(test, assert_instr(vrcp14ps))]
4895	pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896	unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:`0b11111111_11111111`)) }
4897	}
4898
4899	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900	///
4901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902	#[inline]
4903	#[target_feature(enable = "avx512f")]
4904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4905	#[cfg_attr(test, assert_instr(vrcp14ps))]
4906	pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907	unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
4908	}
4909
4910	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911	///
4912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913	#[inline]
4914	#[target_feature(enable = "avx512f")]
4915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4916	#[cfg_attr(test, assert_instr(vrcp14ps))]
4917	pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918	unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
4919	}
4920
4921	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922	///
4923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924	#[inline]
4925	#[target_feature(enable = "avx512f,avx512vl")]
4926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927	#[cfg_attr(test, assert_instr(vrcp14ps))]
4928	pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929	unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:`0b11111111`)) }
4930	}
4931
4932	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933	///
4934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935	#[inline]
4936	#[target_feature(enable = "avx512f,avx512vl")]
4937	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938	#[cfg_attr(test, assert_instr(vrcp14ps))]
4939	pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940	unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
4941	}
4942
4943	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944	///
4945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946	#[inline]
4947	#[target_feature(enable = "avx512f,avx512vl")]
4948	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949	#[cfg_attr(test, assert_instr(vrcp14ps))]
4950	pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951	unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
4952	}
4953
4954	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955	///
4956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957	#[inline]
4958	#[target_feature(enable = "avx512f,avx512vl")]
4959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4960	#[cfg_attr(test, assert_instr(vrcp14ps))]
4961	pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962	unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:`0b00001111`)) }
4963	}
4964
4965	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966	///
4967	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968	#[inline]
4969	#[target_feature(enable = "avx512f,avx512vl")]
4970	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4971	#[cfg_attr(test, assert_instr(vrcp14ps))]
4972	pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973	unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
4974	}
4975
4976	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977	///
4978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979	#[inline]
4980	#[target_feature(enable = "avx512f,avx512vl")]
4981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982	#[cfg_attr(test, assert_instr(vrcp14ps))]
4983	pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984	unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
4985	}
4986
4987	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988	///
4989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990	#[inline]
4991	#[target_feature(enable = "avx512f")]
4992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4993	#[cfg_attr(test, assert_instr(vrcp14pd))]
4994	pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995	unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:`0b11111111`)) }
4996	}
4997
4998	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999	///
5000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001	#[inline]
5002	#[target_feature(enable = "avx512f")]
5003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5004	#[cfg_attr(test, assert_instr(vrcp14pd))]
5005	pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006	unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5007	}
5008
5009	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010	///
5011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012	#[inline]
5013	#[target_feature(enable = "avx512f")]
5014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5015	#[cfg_attr(test, assert_instr(vrcp14pd))]
5016	pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017	unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5018	}
5019
5020	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021	///
5022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023	#[inline]
5024	#[target_feature(enable = "avx512f,avx512vl")]
5025	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5026	#[cfg_attr(test, assert_instr(vrcp14pd))]
5027	pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028	unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:`0b00001111`)) }
5029	}
5030
5031	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032	///
5033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034	#[inline]
5035	#[target_feature(enable = "avx512f,avx512vl")]
5036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5037	#[cfg_attr(test, assert_instr(vrcp14pd))]
5038	pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039	unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5040	}
5041
5042	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043	///
5044	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045	#[inline]
5046	#[target_feature(enable = "avx512f,avx512vl")]
5047	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048	#[cfg_attr(test, assert_instr(vrcp14pd))]
5049	pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050	unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5051	}
5052
5053	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054	///
5055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056	#[inline]
5057	#[target_feature(enable = "avx512f,avx512vl")]
5058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5059	#[cfg_attr(test, assert_instr(vrcp14pd))]
5060	pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061	unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:`0b00000011`)) }
5062	}
5063
5064	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065	///
5066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067	#[inline]
5068	#[target_feature(enable = "avx512f,avx512vl")]
5069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5070	#[cfg_attr(test, assert_instr(vrcp14pd))]
5071	pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072	unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5073	}
5074
5075	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076	///
5077	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078	#[inline]
5079	#[target_feature(enable = "avx512f,avx512vl")]
5080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5081	#[cfg_attr(test, assert_instr(vrcp14pd))]
5082	pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083	unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5084	}
5085
5086	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087	///
5088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089	#[inline]
5090	#[target_feature(enable = "avx512f")]
5091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093	pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094	unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:`0b11111111_11111111`)) }
5095	}
5096
5097	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098	///
5099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100	#[inline]
5101	#[target_feature(enable = "avx512f")]
5102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104	pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105	unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5106	}
5107
5108	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109	///
5110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111	#[inline]
5112	#[target_feature(enable = "avx512f")]
5113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5114	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115	pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116	unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5117	}
5118
5119	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120	///
5121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122	#[inline]
5123	#[target_feature(enable = "avx512f,avx512vl")]
5124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5125	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126	pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127	unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:`0b11111111`)) }
5128	}
5129
5130	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131	///
5132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133	#[inline]
5134	#[target_feature(enable = "avx512f,avx512vl")]
5135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5136	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137	pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138	unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5139	}
5140
5141	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142	///
5143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144	#[inline]
5145	#[target_feature(enable = "avx512f,avx512vl")]
5146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5147	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148	pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149	unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5150	}
5151
5152	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153	///
5154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155	#[inline]
5156	#[target_feature(enable = "avx512f,avx512vl")]
5157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159	pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160	unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:`0b00001111`)) }
5161	}
5162
5163	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164	///
5165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166	#[inline]
5167	#[target_feature(enable = "avx512f,avx512vl")]
5168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5169	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170	pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171	unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5172	}
5173
5174	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175	///
5176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177	#[inline]
5178	#[target_feature(enable = "avx512f,avx512vl")]
5179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181	pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182	unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5183	}
5184
5185	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186	///
5187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188	#[inline]
5189	#[target_feature(enable = "avx512f")]
5190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5191	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192	pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193	unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:`0b11111111`)) }
5194	}
5195
5196	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197	///
5198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199	#[inline]
5200	#[target_feature(enable = "avx512f")]
5201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5202	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203	pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204	unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5205	}
5206
5207	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208	///
5209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210	#[inline]
5211	#[target_feature(enable = "avx512f")]
5212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214	pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215	unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5216	}
5217
5218	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219	///
5220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221	#[inline]
5222	#[target_feature(enable = "avx512f,avx512vl")]
5223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5224	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225	pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226	unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:`0b00001111`)) }
5227	}
5228
5229	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230	///
5231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232	#[inline]
5233	#[target_feature(enable = "avx512f,avx512vl")]
5234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5235	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236	pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237	unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5238	}
5239
5240	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241	///
5242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243	#[inline]
5244	#[target_feature(enable = "avx512f,avx512vl")]
5245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5246	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247	pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248	unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5249	}
5250
5251	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252	///
5253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254	#[inline]
5255	#[target_feature(enable = "avx512f,avx512vl")]
5256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5257	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258	pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259	unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:`0b00000011`)) }
5260	}
5261
5262	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263	///
5264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265	#[inline]
5266	#[target_feature(enable = "avx512f,avx512vl")]
5267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269	pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270	unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5271	}
5272
5273	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274	///
5275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276	#[inline]
5277	#[target_feature(enable = "avx512f,avx512vl")]
5278	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5279	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280	pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281	unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5282	}
5283
5284	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285	///
5286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287	#[inline]
5288	#[target_feature(enable = "avx512f")]
5289	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290	#[cfg_attr(test, assert_instr(vgetexpps))]
5291	pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292	unsafe {
5293	transmute(src:vgetexpps(
5294	a.as_f32x16(),
5295	src:f32x16::ZERO,
5296	m:`0b11111111_11111111`,
5297	_MM_FROUND_CUR_DIRECTION,
5298	))
5299	}
5300	}
5301
5302	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303	///
5304	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305	#[inline]
5306	#[target_feature(enable = "avx512f")]
5307	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308	#[cfg_attr(test, assert_instr(vgetexpps))]
5309	pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310	unsafe {
5311	transmute(src:vgetexpps(
5312	a.as_f32x16(),
5313	src.as_f32x16(),
5314	m:k,
5315	_MM_FROUND_CUR_DIRECTION,
5316	))
5317	}
5318	}
5319
5320	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321	///
5322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323	#[inline]
5324	#[target_feature(enable = "avx512f")]
5325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5326	#[cfg_attr(test, assert_instr(vgetexpps))]
5327	pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328	unsafe {
5329	transmute(src:vgetexpps(
5330	a.as_f32x16(),
5331	src:f32x16::ZERO,
5332	m:k,
5333	_MM_FROUND_CUR_DIRECTION,
5334	))
5335	}
5336	}
5337
5338	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339	///
5340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341	#[inline]
5342	#[target_feature(enable = "avx512f,avx512vl")]
5343	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5344	#[cfg_attr(test, assert_instr(vgetexpps))]
5345	pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346	unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:`0b11111111`)) }
5347	}
5348
5349	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350	///
5351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352	#[inline]
5353	#[target_feature(enable = "avx512f,avx512vl")]
5354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5355	#[cfg_attr(test, assert_instr(vgetexpps))]
5356	pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357	unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5358	}
5359
5360	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361	///
5362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363	#[inline]
5364	#[target_feature(enable = "avx512f,avx512vl")]
5365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5366	#[cfg_attr(test, assert_instr(vgetexpps))]
5367	pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368	unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5369	}
5370
5371	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372	///
5373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374	#[inline]
5375	#[target_feature(enable = "avx512f,avx512vl")]
5376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377	#[cfg_attr(test, assert_instr(vgetexpps))]
5378	pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379	unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:`0b00001111`)) }
5380	}
5381
5382	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383	///
5384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385	#[inline]
5386	#[target_feature(enable = "avx512f,avx512vl")]
5387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388	#[cfg_attr(test, assert_instr(vgetexpps))]
5389	pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390	unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5391	}
5392
5393	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394	///
5395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396	#[inline]
5397	#[target_feature(enable = "avx512f,avx512vl")]
5398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399	#[cfg_attr(test, assert_instr(vgetexpps))]
5400	pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401	unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5402	}
5403
5404	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405	///
5406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407	#[inline]
5408	#[target_feature(enable = "avx512f")]
5409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410	#[cfg_attr(test, assert_instr(vgetexppd))]
5411	pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412	unsafe {
5413	transmute(src:vgetexppd(
5414	a.as_f64x8(),
5415	src:f64x8::ZERO,
5416	m:`0b11111111`,
5417	_MM_FROUND_CUR_DIRECTION,
5418	))
5419	}
5420	}
5421
5422	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423	///
5424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425	#[inline]
5426	#[target_feature(enable = "avx512f")]
5427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428	#[cfg_attr(test, assert_instr(vgetexppd))]
5429	pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430	unsafe {
5431	transmute(src:vgetexppd(
5432	a.as_f64x8(),
5433	src.as_f64x8(),
5434	m:k,
5435	_MM_FROUND_CUR_DIRECTION,
5436	))
5437	}
5438	}
5439
5440	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441	///
5442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443	#[inline]
5444	#[target_feature(enable = "avx512f")]
5445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5446	#[cfg_attr(test, assert_instr(vgetexppd))]
5447	pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448	unsafe {
5449	transmute(src:vgetexppd(
5450	a.as_f64x8(),
5451	src:f64x8::ZERO,
5452	m:k,
5453	_MM_FROUND_CUR_DIRECTION,
5454	))
5455	}
5456	}
5457
5458	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459	///
5460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461	#[inline]
5462	#[target_feature(enable = "avx512f,avx512vl")]
5463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5464	#[cfg_attr(test, assert_instr(vgetexppd))]
5465	pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466	unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:`0b00001111`)) }
5467	}
5468
5469	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470	///
5471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472	#[inline]
5473	#[target_feature(enable = "avx512f,avx512vl")]
5474	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5475	#[cfg_attr(test, assert_instr(vgetexppd))]
5476	pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477	unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5478	}
5479
5480	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481	///
5482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483	#[inline]
5484	#[target_feature(enable = "avx512f,avx512vl")]
5485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5486	#[cfg_attr(test, assert_instr(vgetexppd))]
5487	pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488	unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5489	}
5490
5491	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492	///
5493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494	#[inline]
5495	#[target_feature(enable = "avx512f,avx512vl")]
5496	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5497	#[cfg_attr(test, assert_instr(vgetexppd))]
5498	pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499	unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:`0b00000011`)) }
5500	}
5501
5502	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503	///
5504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505	#[inline]
5506	#[target_feature(enable = "avx512f,avx512vl")]
5507	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508	#[cfg_attr(test, assert_instr(vgetexppd))]
5509	pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510	unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5511	}
5512
5513	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514	///
5515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516	#[inline]
5517	#[target_feature(enable = "avx512f,avx512vl")]
5518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5519	#[cfg_attr(test, assert_instr(vgetexppd))]
5520	pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521	unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5522	}
5523
5524	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531	///
5532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533	#[inline]
5534	#[target_feature(enable = "avx512f")]
5535	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5536	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5537	#[rustc_legacy_const_generics(`1`)]
5538	pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539	unsafe {
5540	static_assert_uimm_bits!(IMM8, `8`);
5541	let a: f32x16 = a.as_f32x16();
5542	let r: f32x16 = vrndscaleps(
5543	a,
5544	IMM8,
5545	src:f32x16::ZERO,
5546	mask:`0b11111111_11111111`,
5547	_MM_FROUND_CUR_DIRECTION,
5548	);
5549	transmute(src:r)
5550	}
5551	}
5552
5553	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560	///
5561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562	#[inline]
5563	#[target_feature(enable = "avx512f")]
5564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5565	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5566	#[rustc_legacy_const_generics(`3`)]
5567	pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568	unsafe {
5569	static_assert_uimm_bits!(IMM8, `8`);
5570	let a: f32x16 = a.as_f32x16();
5571	let src: f32x16 = src.as_f32x16();
5572	let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5573	transmute(src:r)
5574	}
5575	}
5576
5577	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584	///
5585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586	#[inline]
5587	#[target_feature(enable = "avx512f")]
5588	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5589	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5590	#[rustc_legacy_const_generics(`2`)]
5591	pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592	unsafe {
5593	static_assert_uimm_bits!(IMM8, `8`);
5594	let a: f32x16 = a.as_f32x16();
5595	let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5596	transmute(src:r)
5597	}
5598	}
5599
5600	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607	///
5608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609	#[inline]
5610	#[target_feature(enable = "avx512f,avx512vl")]
5611	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5612	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `250`))]
5613	#[rustc_legacy_const_generics(`1`)]
5614	pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615	unsafe {
5616	static_assert_uimm_bits!(IMM8, `8`);
5617	let a: f32x8 = a.as_f32x8();
5618	let r: f32x8 = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:`0b11111111`);
5619	transmute(src:r)
5620	}
5621	}
5622
5623	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630	///
5631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632	#[inline]
5633	#[target_feature(enable = "avx512f,avx512vl")]
5634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5635	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5636	#[rustc_legacy_const_generics(`3`)]
5637	pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638	unsafe {
5639	static_assert_uimm_bits!(IMM8, `8`);
5640	let a: f32x8 = a.as_f32x8();
5641	let src: f32x8 = src.as_f32x8();
5642	let r: f32x8 = vrndscaleps256(a, IMM8, src, mask:k);
5643	transmute(src:r)
5644	}
5645	}
5646
5647	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654	///
5655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656	#[inline]
5657	#[target_feature(enable = "avx512f,avx512vl")]
5658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5659	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5660	#[rustc_legacy_const_generics(`2`)]
5661	pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662	unsafe {
5663	static_assert_uimm_bits!(IMM8, `8`);
5664	let a: f32x8 = a.as_f32x8();
5665	let r: f32x8 = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:k);
5666	transmute(src:r)
5667	}
5668	}
5669
5670	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677	///
5678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679	#[inline]
5680	#[target_feature(enable = "avx512f,avx512vl")]
5681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5682	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `250`))]
5683	#[rustc_legacy_const_generics(`1`)]
5684	pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685	unsafe {
5686	static_assert_uimm_bits!(IMM8, `8`);
5687	let a: f32x4 = a.as_f32x4();
5688	let r: f32x4 = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:`0b00001111`);
5689	transmute(src:r)
5690	}
5691	}
5692
5693	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700	///
5701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702	#[inline]
5703	#[target_feature(enable = "avx512f,avx512vl")]
5704	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5705	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5706	#[rustc_legacy_const_generics(`3`)]
5707	pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708	unsafe {
5709	static_assert_uimm_bits!(IMM8, `8`);
5710	let a: f32x4 = a.as_f32x4();
5711	let src: f32x4 = src.as_f32x4();
5712	let r: f32x4 = vrndscaleps128(a, IMM8, src, mask:k);
5713	transmute(src:r)
5714	}
5715	}
5716
5717	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724	///
5725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726	#[inline]
5727	#[target_feature(enable = "avx512f,avx512vl")]
5728	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5729	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5730	#[rustc_legacy_const_generics(`2`)]
5731	pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732	unsafe {
5733	static_assert_uimm_bits!(IMM8, `8`);
5734	let a: f32x4 = a.as_f32x4();
5735	let r: f32x4 = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:k);
5736	transmute(src:r)
5737	}
5738	}
5739
5740	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747	///
5748	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749	#[inline]
5750	#[target_feature(enable = "avx512f")]
5751	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5752	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5753	#[rustc_legacy_const_generics(`1`)]
5754	pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755	unsafe {
5756	static_assert_uimm_bits!(IMM8, `8`);
5757	let a: f64x8 = a.as_f64x8();
5758	let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
5759	transmute(src:r)
5760	}
5761	}
5762
5763	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770	///
5771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772	#[inline]
5773	#[target_feature(enable = "avx512f")]
5774	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5775	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5776	#[rustc_legacy_const_generics(`3`)]
5777	pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778	src: __m512d,
5779	k: __mmask8,
5780	a: __m512d,
5781	) -> __m512d {
5782	unsafe {
5783	static_assert_uimm_bits!(IMM8, `8`);
5784	let a: f64x8 = a.as_f64x8();
5785	let src: f64x8 = src.as_f64x8();
5786	let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5787	transmute(src:r)
5788	}
5789	}
5790
5791	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798	///
5799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800	#[inline]
5801	#[target_feature(enable = "avx512f")]
5802	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5803	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5804	#[rustc_legacy_const_generics(`2`)]
5805	pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806	unsafe {
5807	static_assert_uimm_bits!(IMM8, `8`);
5808	let a: f64x8 = a.as_f64x8();
5809	let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5810	transmute(src:r)
5811	}
5812	}
5813
5814	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821	///
5822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823	#[inline]
5824	#[target_feature(enable = "avx512f,avx512vl")]
5825	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5826	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5827	#[rustc_legacy_const_generics(`1`)]
5828	pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829	unsafe {
5830	static_assert_uimm_bits!(IMM8, `8`);
5831	let a: f64x4 = a.as_f64x4();
5832	let r: f64x4 = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:`0b00001111`);
5833	transmute(src:r)
5834	}
5835	}
5836
5837	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844	///
5845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846	#[inline]
5847	#[target_feature(enable = "avx512f,avx512vl")]
5848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5850	#[rustc_legacy_const_generics(`3`)]
5851	pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852	src: __m256d,
5853	k: __mmask8,
5854	a: __m256d,
5855	) -> __m256d {
5856	unsafe {
5857	static_assert_uimm_bits!(IMM8, `8`);
5858	let a: f64x4 = a.as_f64x4();
5859	let src: f64x4 = src.as_f64x4();
5860	let r: f64x4 = vrndscalepd256(a, IMM8, src, mask:k);
5861	transmute(src:r)
5862	}
5863	}
5864
5865	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872	///
5873	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874	#[inline]
5875	#[target_feature(enable = "avx512f,avx512vl")]
5876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5878	#[rustc_legacy_const_generics(`2`)]
5879	pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880	unsafe {
5881	static_assert_uimm_bits!(IMM8, `8`);
5882	let a: f64x4 = a.as_f64x4();
5883	let r: f64x4 = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:k);
5884	transmute(src:r)
5885	}
5886	}
5887
5888	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895	///
5896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897	#[inline]
5898	#[target_feature(enable = "avx512f,avx512vl")]
5899	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5900	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5901	#[rustc_legacy_const_generics(`1`)]
5902	pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903	unsafe {
5904	static_assert_uimm_bits!(IMM8, `8`);
5905	let a: f64x2 = a.as_f64x2();
5906	let r: f64x2 = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:`0b00000011`);
5907	transmute(src:r)
5908	}
5909	}
5910
5911	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918	///
5919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920	#[inline]
5921	#[target_feature(enable = "avx512f,avx512vl")]
5922	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5923	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5924	#[rustc_legacy_const_generics(`3`)]
5925	pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926	unsafe {
5927	static_assert_uimm_bits!(IMM8, `8`);
5928	let a: f64x2 = a.as_f64x2();
5929	let src: f64x2 = src.as_f64x2();
5930	let r: f64x2 = vrndscalepd128(a, IMM8, src, mask:k);
5931	transmute(src:r)
5932	}
5933	}
5934
5935	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942	///
5943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944	#[inline]
5945	#[target_feature(enable = "avx512f,avx512vl")]
5946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
5948	#[rustc_legacy_const_generics(`2`)]
5949	pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950	unsafe {
5951	static_assert_uimm_bits!(IMM8, `8`);
5952	let a: f64x2 = a.as_f64x2();
5953	let r: f64x2 = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:k);
5954	transmute(src:r)
5955	}
5956	}
5957
5958	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959	///
5960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961	#[inline]
5962	#[target_feature(enable = "avx512f")]
5963	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5964	#[cfg_attr(test, assert_instr(vscalefps))]
5965	pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966	unsafe {
5967	transmute(src:vscalefps(
5968	a.as_f32x16(),
5969	b.as_f32x16(),
5970	src:f32x16::ZERO,
5971	mask:`0b11111111_11111111`,
5972	_MM_FROUND_CUR_DIRECTION,
5973	))
5974	}
5975	}
5976
5977	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978	///
5979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980	#[inline]
5981	#[target_feature(enable = "avx512f")]
5982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5983	#[cfg_attr(test, assert_instr(vscalefps))]
5984	pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985	unsafe {
5986	transmute(src:vscalefps(
5987	a.as_f32x16(),
5988	b.as_f32x16(),
5989	src.as_f32x16(),
5990	mask:k,
5991	_MM_FROUND_CUR_DIRECTION,
5992	))
5993	}
5994	}
5995
5996	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997	///
5998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999	#[inline]
6000	#[target_feature(enable = "avx512f")]
6001	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002	#[cfg_attr(test, assert_instr(vscalefps))]
6003	pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004	unsafe {
6005	transmute(src:vscalefps(
6006	a.as_f32x16(),
6007	b.as_f32x16(),
6008	src:f32x16::ZERO,
6009	mask:k,
6010	_MM_FROUND_CUR_DIRECTION,
6011	))
6012	}
6013	}
6014
6015	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016	///
6017	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018	#[inline]
6019	#[target_feature(enable = "avx512f,avx512vl")]
6020	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6021	#[cfg_attr(test, assert_instr(vscalefps))]
6022	pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023	unsafe {
6024	transmute(src:vscalefps256(
6025	a.as_f32x8(),
6026	b.as_f32x8(),
6027	src:f32x8::ZERO,
6028	mask:`0b11111111`,
6029	))
6030	}
6031	}
6032
6033	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034	///
6035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036	#[inline]
6037	#[target_feature(enable = "avx512f,avx512vl")]
6038	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039	#[cfg_attr(test, assert_instr(vscalefps))]
6040	pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041	unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), mask:k)) }
6042	}
6043
6044	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045	///
6046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047	#[inline]
6048	#[target_feature(enable = "avx512f,avx512vl")]
6049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6050	#[cfg_attr(test, assert_instr(vscalefps))]
6051	pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052	unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src:f32x8::ZERO, mask:k)) }
6053	}
6054
6055	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056	///
6057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058	#[inline]
6059	#[target_feature(enable = "avx512f,avx512vl")]
6060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6061	#[cfg_attr(test, assert_instr(vscalefps))]
6062	pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063	unsafe {
6064	transmute(src:vscalefps128(
6065	a.as_f32x4(),
6066	b.as_f32x4(),
6067	src:f32x4::ZERO,
6068	mask:`0b00001111`,
6069	))
6070	}
6071	}
6072
6073	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074	///
6075	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076	#[inline]
6077	#[target_feature(enable = "avx512f,avx512vl")]
6078	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6079	#[cfg_attr(test, assert_instr(vscalefps))]
6080	pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081	unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
6082	}
6083
6084	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085	///
6086	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087	#[inline]
6088	#[target_feature(enable = "avx512f,avx512vl")]
6089	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6090	#[cfg_attr(test, assert_instr(vscalefps))]
6091	pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092	unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
6093	}
6094
6095	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096	///
6097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098	#[inline]
6099	#[target_feature(enable = "avx512f")]
6100	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6101	#[cfg_attr(test, assert_instr(vscalefpd))]
6102	pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103	unsafe {
6104	transmute(src:vscalefpd(
6105	a.as_f64x8(),
6106	b.as_f64x8(),
6107	src:f64x8::ZERO,
6108	mask:`0b11111111`,
6109	_MM_FROUND_CUR_DIRECTION,
6110	))
6111	}
6112	}
6113
6114	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115	///
6116	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117	#[inline]
6118	#[target_feature(enable = "avx512f")]
6119	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6120	#[cfg_attr(test, assert_instr(vscalefpd))]
6121	pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122	unsafe {
6123	transmute(src:vscalefpd(
6124	a.as_f64x8(),
6125	b.as_f64x8(),
6126	src.as_f64x8(),
6127	mask:k,
6128	_MM_FROUND_CUR_DIRECTION,
6129	))
6130	}
6131	}
6132
6133	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134	///
6135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136	#[inline]
6137	#[target_feature(enable = "avx512f")]
6138	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139	#[cfg_attr(test, assert_instr(vscalefpd))]
6140	pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141	unsafe {
6142	transmute(src:vscalefpd(
6143	a.as_f64x8(),
6144	b.as_f64x8(),
6145	src:f64x8::ZERO,
6146	mask:k,
6147	_MM_FROUND_CUR_DIRECTION,
6148	))
6149	}
6150	}
6151
6152	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153	///
6154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155	#[inline]
6156	#[target_feature(enable = "avx512f,avx512vl")]
6157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6158	#[cfg_attr(test, assert_instr(vscalefpd))]
6159	pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160	unsafe {
6161	transmute(src:vscalefpd256(
6162	a.as_f64x4(),
6163	b.as_f64x4(),
6164	src:f64x4::ZERO,
6165	mask:`0b00001111`,
6166	))
6167	}
6168	}
6169
6170	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171	///
6172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173	#[inline]
6174	#[target_feature(enable = "avx512f,avx512vl")]
6175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176	#[cfg_attr(test, assert_instr(vscalefpd))]
6177	pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178	unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), mask:k)) }
6179	}
6180
6181	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182	///
6183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184	#[inline]
6185	#[target_feature(enable = "avx512f,avx512vl")]
6186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6187	#[cfg_attr(test, assert_instr(vscalefpd))]
6188	pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189	unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src:f64x4::ZERO, mask:k)) }
6190	}
6191
6192	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193	///
6194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195	#[inline]
6196	#[target_feature(enable = "avx512f,avx512vl")]
6197	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6198	#[cfg_attr(test, assert_instr(vscalefpd))]
6199	pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200	unsafe {
6201	transmute(src:vscalefpd128(
6202	a.as_f64x2(),
6203	b.as_f64x2(),
6204	src:f64x2::ZERO,
6205	mask:`0b00000011`,
6206	))
6207	}
6208	}
6209
6210	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211	///
6212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213	#[inline]
6214	#[target_feature(enable = "avx512f,avx512vl")]
6215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6216	#[cfg_attr(test, assert_instr(vscalefpd))]
6217	pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218	unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
6219	}
6220
6221	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222	///
6223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224	#[inline]
6225	#[target_feature(enable = "avx512f,avx512vl")]
6226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6227	#[cfg_attr(test, assert_instr(vscalefpd))]
6228	pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229	unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
6230	}
6231
6232	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233	///
6234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235	#[inline]
6236	#[target_feature(enable = "avx512f")]
6237	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6238	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6239	#[rustc_legacy_const_generics(`3`)]
6240	pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241	unsafe {
6242	static_assert_uimm_bits!(IMM8, `8`);
6243	let a: f32x16 = a.as_f32x16();
6244	let b: f32x16 = b.as_f32x16();
6245	let c: i32x16 = c.as_i32x16();
6246	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:`0b11111111_11111111`, _MM_FROUND_CUR_DIRECTION);
6247	transmute(src:r)
6248	}
6249	}
6250
6251	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252	///
6253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254	#[inline]
6255	#[target_feature(enable = "avx512f")]
6256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6258	#[rustc_legacy_const_generics(`4`)]
6259	pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260	a: __m512,
6261	k: __mmask16,
6262	b: __m512,
6263	c: __m512i,
6264	) -> __m512 {
6265	unsafe {
6266	static_assert_uimm_bits!(IMM8, `8`);
6267	let a: f32x16 = a.as_f32x16();
6268	let b: f32x16 = b.as_f32x16();
6269	let c: i32x16 = c.as_i32x16();
6270	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6271	transmute(src:r)
6272	}
6273	}
6274
6275	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276	///
6277	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278	#[inline]
6279	#[target_feature(enable = "avx512f")]
6280	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6281	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6282	#[rustc_legacy_const_generics(`4`)]
6283	pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284	k: __mmask16,
6285	a: __m512,
6286	b: __m512,
6287	c: __m512i,
6288	) -> __m512 {
6289	unsafe {
6290	static_assert_uimm_bits!(IMM8, `8`);
6291	let a: f32x16 = a.as_f32x16();
6292	let b: f32x16 = b.as_f32x16();
6293	let c: i32x16 = c.as_i32x16();
6294	let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6295	transmute(src:r)
6296	}
6297	}
6298
6299	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300	///
6301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302	#[inline]
6303	#[target_feature(enable = "avx512f,avx512vl")]
6304	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6305	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6306	#[rustc_legacy_const_generics(`3`)]
6307	pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308	unsafe {
6309	static_assert_uimm_bits!(IMM8, `8`);
6310	let a: f32x8 = a.as_f32x8();
6311	let b: f32x8 = b.as_f32x8();
6312	let c: i32x8 = c.as_i32x8();
6313	let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:`0b11111111`);
6314	transmute(src:r)
6315	}
6316	}
6317
6318	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319	///
6320	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321	#[inline]
6322	#[target_feature(enable = "avx512f,avx512vl")]
6323	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6325	#[rustc_legacy_const_generics(`4`)]
6326	pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327	a: __m256,
6328	k: __mmask8,
6329	b: __m256,
6330	c: __m256i,
6331	) -> __m256 {
6332	unsafe {
6333	static_assert_uimm_bits!(IMM8, `8`);
6334	let a: f32x8 = a.as_f32x8();
6335	let b: f32x8 = b.as_f32x8();
6336	let c: i32x8 = c.as_i32x8();
6337	let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:k);
6338	transmute(src:r)
6339	}
6340	}
6341
6342	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343	///
6344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345	#[inline]
6346	#[target_feature(enable = "avx512f,avx512vl")]
6347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6349	#[rustc_legacy_const_generics(`4`)]
6350	pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351	k: __mmask8,
6352	a: __m256,
6353	b: __m256,
6354	c: __m256i,
6355	) -> __m256 {
6356	unsafe {
6357	static_assert_uimm_bits!(IMM8, `8`);
6358	let a: f32x8 = a.as_f32x8();
6359	let b: f32x8 = b.as_f32x8();
6360	let c: i32x8 = c.as_i32x8();
6361	let r: f32x8 = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6362	transmute(src:r)
6363	}
6364	}
6365
6366	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367	///
6368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369	#[inline]
6370	#[target_feature(enable = "avx512f,avx512vl")]
6371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6372	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6373	#[rustc_legacy_const_generics(`3`)]
6374	pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375	unsafe {
6376	static_assert_uimm_bits!(IMM8, `8`);
6377	let a: f32x4 = a.as_f32x4();
6378	let b: f32x4 = b.as_f32x4();
6379	let c: i32x4 = c.as_i32x4();
6380	let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:`0b00001111`);
6381	transmute(src:r)
6382	}
6383	}
6384
6385	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386	///
6387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388	#[inline]
6389	#[target_feature(enable = "avx512f,avx512vl")]
6390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6391	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6392	#[rustc_legacy_const_generics(`4`)]
6393	pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394	a: __m128,
6395	k: __mmask8,
6396	b: __m128,
6397	c: __m128i,
6398	) -> __m128 {
6399	unsafe {
6400	static_assert_uimm_bits!(IMM8, `8`);
6401	let a: f32x4 = a.as_f32x4();
6402	let b: f32x4 = b.as_f32x4();
6403	let c: i32x4 = c.as_i32x4();
6404	let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:k);
6405	transmute(src:r)
6406	}
6407	}
6408
6409	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410	///
6411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412	#[inline]
6413	#[target_feature(enable = "avx512f,avx512vl")]
6414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6415	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6416	#[rustc_legacy_const_generics(`4`)]
6417	pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418	k: __mmask8,
6419	a: __m128,
6420	b: __m128,
6421	c: __m128i,
6422	) -> __m128 {
6423	unsafe {
6424	static_assert_uimm_bits!(IMM8, `8`);
6425	let a: f32x4 = a.as_f32x4();
6426	let b: f32x4 = b.as_f32x4();
6427	let c: i32x4 = c.as_i32x4();
6428	let r: f32x4 = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6429	transmute(src:r)
6430	}
6431	}
6432
6433	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434	///
6435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436	#[inline]
6437	#[target_feature(enable = "avx512f")]
6438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6440	#[rustc_legacy_const_generics(`3`)]
6441	pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442	unsafe {
6443	static_assert_uimm_bits!(IMM8, `8`);
6444	let a: f64x8 = a.as_f64x8();
6445	let b: f64x8 = b.as_f64x8();
6446	let c: i64x8 = c.as_i64x8();
6447	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
6448	transmute(src:r)
6449	}
6450	}
6451
6452	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453	///
6454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455	#[inline]
6456	#[target_feature(enable = "avx512f")]
6457	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6458	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6459	#[rustc_legacy_const_generics(`4`)]
6460	pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461	a: __m512d,
6462	k: __mmask8,
6463	b: __m512d,
6464	c: __m512i,
6465	) -> __m512d {
6466	unsafe {
6467	static_assert_uimm_bits!(IMM8, `8`);
6468	let a: f64x8 = a.as_f64x8();
6469	let b: f64x8 = b.as_f64x8();
6470	let c: i64x8 = c.as_i64x8();
6471	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6472	transmute(src:r)
6473	}
6474	}
6475
6476	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477	///
6478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479	#[inline]
6480	#[target_feature(enable = "avx512f")]
6481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6482	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6483	#[rustc_legacy_const_generics(`4`)]
6484	pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485	k: __mmask8,
6486	a: __m512d,
6487	b: __m512d,
6488	c: __m512i,
6489	) -> __m512d {
6490	unsafe {
6491	static_assert_uimm_bits!(IMM8, `8`);
6492	let a: f64x8 = a.as_f64x8();
6493	let b: f64x8 = b.as_f64x8();
6494	let c: i64x8 = c.as_i64x8();
6495	let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6496	transmute(src:r)
6497	}
6498	}
6499
6500	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501	///
6502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503	#[inline]
6504	#[target_feature(enable = "avx512f,avx512vl")]
6505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6506	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6507	#[rustc_legacy_const_generics(`3`)]
6508	pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509	unsafe {
6510	static_assert_uimm_bits!(IMM8, `8`);
6511	let a: f64x4 = a.as_f64x4();
6512	let b: f64x4 = b.as_f64x4();
6513	let c: i64x4 = c.as_i64x4();
6514	let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:`0b00001111`);
6515	transmute(src:r)
6516	}
6517	}
6518
6519	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520	///
6521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522	#[inline]
6523	#[target_feature(enable = "avx512f,avx512vl")]
6524	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6526	#[rustc_legacy_const_generics(`4`)]
6527	pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528	a: __m256d,
6529	k: __mmask8,
6530	b: __m256d,
6531	c: __m256i,
6532	) -> __m256d {
6533	unsafe {
6534	static_assert_uimm_bits!(IMM8, `8`);
6535	let a: f64x4 = a.as_f64x4();
6536	let b: f64x4 = b.as_f64x4();
6537	let c: i64x4 = c.as_i64x4();
6538	let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:k);
6539	transmute(src:r)
6540	}
6541	}
6542
6543	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544	///
6545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546	#[inline]
6547	#[target_feature(enable = "avx512f,avx512vl")]
6548	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6550	#[rustc_legacy_const_generics(`4`)]
6551	pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552	k: __mmask8,
6553	a: __m256d,
6554	b: __m256d,
6555	c: __m256i,
6556	) -> __m256d {
6557	unsafe {
6558	static_assert_uimm_bits!(IMM8, `8`);
6559	let a: f64x4 = a.as_f64x4();
6560	let b: f64x4 = b.as_f64x4();
6561	let c: i64x4 = c.as_i64x4();
6562	let r: f64x4 = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6563	transmute(src:r)
6564	}
6565	}
6566
6567	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568	///
6569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570	#[inline]
6571	#[target_feature(enable = "avx512f,avx512vl")]
6572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6573	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6574	#[rustc_legacy_const_generics(`3`)]
6575	pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576	unsafe {
6577	static_assert_uimm_bits!(IMM8, `8`);
6578	let a: f64x2 = a.as_f64x2();
6579	let b: f64x2 = b.as_f64x2();
6580	let c: i64x2 = c.as_i64x2();
6581	let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:`0b00000011`);
6582	transmute(src:r)
6583	}
6584	}
6585
6586	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587	///
6588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589	#[inline]
6590	#[target_feature(enable = "avx512f,avx512vl")]
6591	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6592	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6593	#[rustc_legacy_const_generics(`4`)]
6594	pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595	a: __m128d,
6596	k: __mmask8,
6597	b: __m128d,
6598	c: __m128i,
6599	) -> __m128d {
6600	unsafe {
6601	static_assert_uimm_bits!(IMM8, `8`);
6602	let a: f64x2 = a.as_f64x2();
6603	let b: f64x2 = b.as_f64x2();
6604	let c: i64x2 = c.as_i64x2();
6605	let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:k);
6606	transmute(src:r)
6607	}
6608	}
6609
6610	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611	///
6612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613	#[inline]
6614	#[target_feature(enable = "avx512f,avx512vl")]
6615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6616	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6617	#[rustc_legacy_const_generics(`4`)]
6618	pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619	k: __mmask8,
6620	a: __m128d,
6621	b: __m128d,
6622	c: __m128i,
6623	) -> __m128d {
6624	unsafe {
6625	static_assert_uimm_bits!(IMM8, `8`);
6626	let a: f64x2 = a.as_f64x2();
6627	let b: f64x2 = b.as_f64x2();
6628	let c: i64x2 = c.as_i64x2();
6629	let r: f64x2 = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6630	transmute(src:r)
6631	}
6632	}
6633
6634	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635	///
6636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637	#[inline]
6638	#[target_feature(enable = "avx512f")]
6639	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6640	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6641	#[rustc_legacy_const_generics(`3`)]
6642	pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643	unsafe {
6644	static_assert_uimm_bits!(IMM8, `8`);
6645	let a: i32x16 = a.as_i32x16();
6646	let b: i32x16 = b.as_i32x16();
6647	let c: i32x16 = c.as_i32x16();
6648	let r: i32x16 = vpternlogd(a, b, c, IMM8);
6649	transmute(src:r)
6650	}
6651	}
6652
6653	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654	///
6655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656	#[inline]
6657	#[target_feature(enable = "avx512f")]
6658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6659	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6660	#[rustc_legacy_const_generics(`4`)]
6661	pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662	src: __m512i,
6663	k: __mmask16,
6664	a: __m512i,
6665	b: __m512i,
6666	) -> __m512i {
6667	unsafe {
6668	static_assert_uimm_bits!(IMM8, `8`);
6669	let src: i32x16 = src.as_i32x16();
6670	let a: i32x16 = a.as_i32x16();
6671	let b: i32x16 = b.as_i32x16();
6672	let r: i32x16 = vpternlogd(a:src, b:a, c:b, IMM8);
6673	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6674	}
6675	}
6676
6677	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678	///
6679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680	#[inline]
6681	#[target_feature(enable = "avx512f")]
6682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6683	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6684	#[rustc_legacy_const_generics(`4`)]
6685	pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686	k: __mmask16,
6687	a: __m512i,
6688	b: __m512i,
6689	c: __m512i,
6690	) -> __m512i {
6691	unsafe {
6692	static_assert_uimm_bits!(IMM8, `8`);
6693	let a: i32x16 = a.as_i32x16();
6694	let b: i32x16 = b.as_i32x16();
6695	let c: i32x16 = c.as_i32x16();
6696	let r: i32x16 = vpternlogd(a, b, c, IMM8);
6697	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
6698	}
6699	}
6700
6701	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702	///
6703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704	#[inline]
6705	#[target_feature(enable = "avx512f,avx512vl")]
6706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6707	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6708	#[rustc_legacy_const_generics(`3`)]
6709	pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710	unsafe {
6711	static_assert_uimm_bits!(IMM8, `8`);
6712	let a: i32x8 = a.as_i32x8();
6713	let b: i32x8 = b.as_i32x8();
6714	let c: i32x8 = c.as_i32x8();
6715	let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6716	transmute(src:r)
6717	}
6718	}
6719
6720	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721	///
6722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723	#[inline]
6724	#[target_feature(enable = "avx512f,avx512vl")]
6725	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6726	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6727	#[rustc_legacy_const_generics(`4`)]
6728	pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729	src: __m256i,
6730	k: __mmask8,
6731	a: __m256i,
6732	b: __m256i,
6733	) -> __m256i {
6734	unsafe {
6735	static_assert_uimm_bits!(IMM8, `8`);
6736	let src: i32x8 = src.as_i32x8();
6737	let a: i32x8 = a.as_i32x8();
6738	let b: i32x8 = b.as_i32x8();
6739	let r: i32x8 = vpternlogd256(a:src, b:a, c:b, IMM8);
6740	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6741	}
6742	}
6743
6744	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745	///
6746	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747	#[inline]
6748	#[target_feature(enable = "avx512f,avx512vl")]
6749	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6750	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6751	#[rustc_legacy_const_generics(`4`)]
6752	pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753	k: __mmask8,
6754	a: __m256i,
6755	b: __m256i,
6756	c: __m256i,
6757	) -> __m256i {
6758	unsafe {
6759	static_assert_uimm_bits!(IMM8, `8`);
6760	let a: i32x8 = a.as_i32x8();
6761	let b: i32x8 = b.as_i32x8();
6762	let c: i32x8 = c.as_i32x8();
6763	let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6764	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
6765	}
6766	}
6767
6768	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769	///
6770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771	#[inline]
6772	#[target_feature(enable = "avx512f,avx512vl")]
6773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6774	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6775	#[rustc_legacy_const_generics(`3`)]
6776	pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777	unsafe {
6778	static_assert_uimm_bits!(IMM8, `8`);
6779	let a: i32x4 = a.as_i32x4();
6780	let b: i32x4 = b.as_i32x4();
6781	let c: i32x4 = c.as_i32x4();
6782	let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6783	transmute(src:r)
6784	}
6785	}
6786
6787	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788	///
6789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790	#[inline]
6791	#[target_feature(enable = "avx512f,avx512vl")]
6792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6793	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6794	#[rustc_legacy_const_generics(`4`)]
6795	pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796	src: __m128i,
6797	k: __mmask8,
6798	a: __m128i,
6799	b: __m128i,
6800	) -> __m128i {
6801	unsafe {
6802	static_assert_uimm_bits!(IMM8, `8`);
6803	let src: i32x4 = src.as_i32x4();
6804	let a: i32x4 = a.as_i32x4();
6805	let b: i32x4 = b.as_i32x4();
6806	let r: i32x4 = vpternlogd128(a:src, b:a, c:b, IMM8);
6807	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6808	}
6809	}
6810
6811	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812	///
6813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814	#[inline]
6815	#[target_feature(enable = "avx512f,avx512vl")]
6816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6817	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6818	#[rustc_legacy_const_generics(`4`)]
6819	pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820	k: __mmask8,
6821	a: __m128i,
6822	b: __m128i,
6823	c: __m128i,
6824	) -> __m128i {
6825	unsafe {
6826	static_assert_uimm_bits!(IMM8, `8`);
6827	let a: i32x4 = a.as_i32x4();
6828	let b: i32x4 = b.as_i32x4();
6829	let c: i32x4 = c.as_i32x4();
6830	let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6831	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
6832	}
6833	}
6834
6835	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836	///
6837	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838	#[inline]
6839	#[target_feature(enable = "avx512f")]
6840	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6841	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6842	#[rustc_legacy_const_generics(`3`)]
6843	pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844	unsafe {
6845	static_assert_uimm_bits!(IMM8, `8`);
6846	let a: i64x8 = a.as_i64x8();
6847	let b: i64x8 = b.as_i64x8();
6848	let c: i64x8 = c.as_i64x8();
6849	let r: i64x8 = vpternlogq(a, b, c, IMM8);
6850	transmute(src:r)
6851	}
6852	}
6853
6854	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855	///
6856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857	#[inline]
6858	#[target_feature(enable = "avx512f")]
6859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6861	#[rustc_legacy_const_generics(`4`)]
6862	pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863	src: __m512i,
6864	k: __mmask8,
6865	a: __m512i,
6866	b: __m512i,
6867	) -> __m512i {
6868	unsafe {
6869	static_assert_uimm_bits!(IMM8, `8`);
6870	let src: i64x8 = src.as_i64x8();
6871	let a: i64x8 = a.as_i64x8();
6872	let b: i64x8 = b.as_i64x8();
6873	let r: i64x8 = vpternlogq(a:src, b:a, c:b, IMM8);
6874	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6875	}
6876	}
6877
6878	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879	///
6880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881	#[inline]
6882	#[target_feature(enable = "avx512f")]
6883	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6884	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6885	#[rustc_legacy_const_generics(`4`)]
6886	pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887	k: __mmask8,
6888	a: __m512i,
6889	b: __m512i,
6890	c: __m512i,
6891	) -> __m512i {
6892	unsafe {
6893	static_assert_uimm_bits!(IMM8, `8`);
6894	let a: i64x8 = a.as_i64x8();
6895	let b: i64x8 = b.as_i64x8();
6896	let c: i64x8 = c.as_i64x8();
6897	let r: i64x8 = vpternlogq(a, b, c, IMM8);
6898	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
6899	}
6900	}
6901
6902	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903	///
6904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905	#[inline]
6906	#[target_feature(enable = "avx512f,avx512vl")]
6907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6908	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6909	#[rustc_legacy_const_generics(`3`)]
6910	pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911	unsafe {
6912	static_assert_uimm_bits!(IMM8, `8`);
6913	let a: i64x4 = a.as_i64x4();
6914	let b: i64x4 = b.as_i64x4();
6915	let c: i64x4 = c.as_i64x4();
6916	let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6917	transmute(src:r)
6918	}
6919	}
6920
6921	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922	///
6923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924	#[inline]
6925	#[target_feature(enable = "avx512f,avx512vl")]
6926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6927	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6928	#[rustc_legacy_const_generics(`4`)]
6929	pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930	src: __m256i,
6931	k: __mmask8,
6932	a: __m256i,
6933	b: __m256i,
6934	) -> __m256i {
6935	unsafe {
6936	static_assert_uimm_bits!(IMM8, `8`);
6937	let src: i64x4 = src.as_i64x4();
6938	let a: i64x4 = a.as_i64x4();
6939	let b: i64x4 = b.as_i64x4();
6940	let r: i64x4 = vpternlogq256(a:src, b:a, c:b, IMM8);
6941	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6942	}
6943	}
6944
6945	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946	///
6947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948	#[inline]
6949	#[target_feature(enable = "avx512f,avx512vl")]
6950	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6951	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6952	#[rustc_legacy_const_generics(`4`)]
6953	pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954	k: __mmask8,
6955	a: __m256i,
6956	b: __m256i,
6957	c: __m256i,
6958	) -> __m256i {
6959	unsafe {
6960	static_assert_uimm_bits!(IMM8, `8`);
6961	let a: i64x4 = a.as_i64x4();
6962	let b: i64x4 = b.as_i64x4();
6963	let c: i64x4 = c.as_i64x4();
6964	let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6965	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
6966	}
6967	}
6968
6969	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970	///
6971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972	#[inline]
6973	#[target_feature(enable = "avx512f,avx512vl")]
6974	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6975	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6976	#[rustc_legacy_const_generics(`3`)]
6977	pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978	unsafe {
6979	static_assert_uimm_bits!(IMM8, `8`);
6980	let a: i64x2 = a.as_i64x2();
6981	let b: i64x2 = b.as_i64x2();
6982	let c: i64x2 = c.as_i64x2();
6983	let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6984	transmute(src:r)
6985	}
6986	}
6987
6988	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989	///
6990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991	#[inline]
6992	#[target_feature(enable = "avx512f,avx512vl")]
6993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6994	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
6995	#[rustc_legacy_const_generics(`4`)]
6996	pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997	src: __m128i,
6998	k: __mmask8,
6999	a: __m128i,
7000	b: __m128i,
7001	) -> __m128i {
7002	unsafe {
7003	static_assert_uimm_bits!(IMM8, `8`);
7004	let src: i64x2 = src.as_i64x2();
7005	let a: i64x2 = a.as_i64x2();
7006	let b: i64x2 = b.as_i64x2();
7007	let r: i64x2 = vpternlogq128(a:src, b:a, c:b, IMM8);
7008	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7009	}
7010	}
7011
7012	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013	///
7014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015	#[inline]
7016	#[target_feature(enable = "avx512f,avx512vl")]
7017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7018	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7019	#[rustc_legacy_const_generics(`4`)]
7020	pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021	k: __mmask8,
7022	a: __m128i,
7023	b: __m128i,
7024	c: __m128i,
7025	) -> __m128i {
7026	unsafe {
7027	static_assert_uimm_bits!(IMM8, `8`);
7028	let a: i64x2 = a.as_i64x2();
7029	let b: i64x2 = b.as_i64x2();
7030	let c: i64x2 = c.as_i64x2();
7031	let r: i64x2 = vpternlogq128(a, b, c, IMM8);
7032	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
7033	}
7034	}
7035
7036	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038	/// _MM_MANT_NORM_1_2 // interval [1, 2)
7039	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7040	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7041	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042	/// The sign is determined by sc which can take the following values:
7043	/// _MM_MANT_SIGN_src // sign = sign(src)
7044	/// _MM_MANT_SIGN_zero // sign = 0
7045	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7046	///
7047	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048	#[inline]
7049	#[target_feature(enable = "avx512f")]
7050	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7051	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7052	#[rustc_legacy_const_generics(`1`, `2`)]
7053	pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054	a: __m512,
7055	) -> __m512 {
7056	unsafe {
7057	static_assert_uimm_bits!(NORM, `4`);
7058	static_assert_uimm_bits!(SIGN, `2`);
7059	let a: f32x16 = a.as_f32x16();
7060	let zero: f32x16 = f32x16::ZERO;
7061	let r: f32x16 = vgetmantps(
7062	a,
7063	SIGN << `2` \| NORM,
7064	src:zero,
7065	m:`0b11111111_11111111`,
7066	_MM_FROUND_CUR_DIRECTION,
7067	);
7068	transmute(src:r)
7069	}
7070	}
7071
7072	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7075	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7076	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7077	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078	/// The sign is determined by sc which can take the following values:\
7079	/// _MM_MANT_SIGN_src // sign = sign(src)\
7080	/// _MM_MANT_SIGN_zero // sign = 0\
7081	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7082	///
7083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084	#[inline]
7085	#[target_feature(enable = "avx512f")]
7086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7087	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7088	#[rustc_legacy_const_generics(`3`, `4`)]
7089	pub fn _mm512_mask_getmant_ps<
7090	const NORM: _MM_MANTISSA_NORM_ENUM,
7091	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092	>(
7093	src: __m512,
7094	k: __mmask16,
7095	a: __m512,
7096	) -> __m512 {
7097	unsafe {
7098	static_assert_uimm_bits!(NORM, `4`);
7099	static_assert_uimm_bits!(SIGN, `2`);
7100	let a: f32x16 = a.as_f32x16();
7101	let src: f32x16 = src.as_f32x16();
7102	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7103	transmute(src:r)
7104	}
7105	}
7106
7107	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7110	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7111	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7112	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113	/// The sign is determined by sc which can take the following values:\
7114	/// _MM_MANT_SIGN_src // sign = sign(src)\
7115	/// _MM_MANT_SIGN_zero // sign = 0\
7116	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7117	///
7118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119	#[inline]
7120	#[target_feature(enable = "avx512f")]
7121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7123	#[rustc_legacy_const_generics(`2`, `3`)]
7124	pub fn _mm512_maskz_getmant_ps<
7125	const NORM: _MM_MANTISSA_NORM_ENUM,
7126	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127	>(
7128	k: __mmask16,
7129	a: __m512,
7130	) -> __m512 {
7131	unsafe {
7132	static_assert_uimm_bits!(NORM, `4`);
7133	static_assert_uimm_bits!(SIGN, `2`);
7134	let a: f32x16 = a.as_f32x16();
7135	let r: f32x16 = vgetmantps(
7136	a,
7137	SIGN << `2` \| NORM,
7138	src:f32x16::ZERO,
7139	m:k,
7140	_MM_FROUND_CUR_DIRECTION,
7141	);
7142	transmute(src:r)
7143	}
7144	}
7145
7146	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148	/// _MM_MANT_NORM_1_2 // interval [1, 2)
7149	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7150	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7151	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152	/// The sign is determined by sc which can take the following values:
7153	/// _MM_MANT_SIGN_src // sign = sign(src)
7154	/// _MM_MANT_SIGN_zero // sign = 0
7155	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7156	///
7157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158	#[inline]
7159	#[target_feature(enable = "avx512f,avx512vl")]
7160	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7161	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7162	#[rustc_legacy_const_generics(`1`, `2`)]
7163	pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164	a: __m256,
7165	) -> __m256 {
7166	unsafe {
7167	static_assert_uimm_bits!(NORM, `4`);
7168	static_assert_uimm_bits!(SIGN, `2`);
7169	let a: f32x8 = a.as_f32x8();
7170	let r: f32x8 = vgetmantps256(a, SIGN << `2` \| NORM, src:f32x8::ZERO, m:`0b11111111`);
7171	transmute(src:r)
7172	}
7173	}
7174
7175	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7178	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7179	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7180	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181	/// The sign is determined by sc which can take the following values:\
7182	/// _MM_MANT_SIGN_src // sign = sign(src)\
7183	/// _MM_MANT_SIGN_zero // sign = 0\
7184	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7185	///
7186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187	#[inline]
7188	#[target_feature(enable = "avx512f,avx512vl")]
7189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7190	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7191	#[rustc_legacy_const_generics(`3`, `4`)]
7192	pub fn _mm256_mask_getmant_ps<
7193	const NORM: _MM_MANTISSA_NORM_ENUM,
7194	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195	>(
7196	src: __m256,
7197	k: __mmask8,
7198	a: __m256,
7199	) -> __m256 {
7200	unsafe {
7201	static_assert_uimm_bits!(NORM, `4`);
7202	static_assert_uimm_bits!(SIGN, `2`);
7203	let a: f32x8 = a.as_f32x8();
7204	let src: f32x8 = src.as_f32x8();
7205	let r: f32x8 = vgetmantps256(a, SIGN << `2` \| NORM, src, m:k);
7206	transmute(src:r)
7207	}
7208	}
7209
7210	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7213	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7214	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7215	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216	/// The sign is determined by sc which can take the following values:\
7217	/// _MM_MANT_SIGN_src // sign = sign(src)\
7218	/// _MM_MANT_SIGN_zero // sign = 0\
7219	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7220	///
7221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222	#[inline]
7223	#[target_feature(enable = "avx512f,avx512vl")]
7224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7225	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7226	#[rustc_legacy_const_generics(`2`, `3`)]
7227	pub fn _mm256_maskz_getmant_ps<
7228	const NORM: _MM_MANTISSA_NORM_ENUM,
7229	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230	>(
7231	k: __mmask8,
7232	a: __m256,
7233	) -> __m256 {
7234	unsafe {
7235	static_assert_uimm_bits!(NORM, `4`);
7236	static_assert_uimm_bits!(SIGN, `2`);
7237	let a: f32x8 = a.as_f32x8();
7238	let r: f32x8 = vgetmantps256(a, SIGN << `2` \| NORM, src:f32x8::ZERO, m:k);
7239	transmute(src:r)
7240	}
7241	}
7242
7243	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245	/// _MM_MANT_NORM_1_2 // interval [1, 2)
7246	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7247	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7248	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249	/// The sign is determined by sc which can take the following values:
7250	/// _MM_MANT_SIGN_src // sign = sign(src)
7251	/// _MM_MANT_SIGN_zero // sign = 0
7252	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7253	///
7254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255	#[inline]
7256	#[target_feature(enable = "avx512f,avx512vl")]
7257	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7258	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7259	#[rustc_legacy_const_generics(`1`, `2`)]
7260	pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261	a: __m128,
7262	) -> __m128 {
7263	unsafe {
7264	static_assert_uimm_bits!(NORM, `4`);
7265	static_assert_uimm_bits!(SIGN, `2`);
7266	let a: f32x4 = a.as_f32x4();
7267	let r: f32x4 = vgetmantps128(a, SIGN << `2` \| NORM, src:f32x4::ZERO, m:`0b00001111`);
7268	transmute(src:r)
7269	}
7270	}
7271
7272	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7275	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7276	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7277	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278	/// The sign is determined by sc which can take the following values:\
7279	/// _MM_MANT_SIGN_src // sign = sign(src)\
7280	/// _MM_MANT_SIGN_zero // sign = 0\
7281	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7282	///
7283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284	#[inline]
7285	#[target_feature(enable = "avx512f,avx512vl")]
7286	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7287	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7288	#[rustc_legacy_const_generics(`3`, `4`)]
7289	pub fn _mm_mask_getmant_ps<
7290	const NORM: _MM_MANTISSA_NORM_ENUM,
7291	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292	>(
7293	src: __m128,
7294	k: __mmask8,
7295	a: __m128,
7296	) -> __m128 {
7297	unsafe {
7298	static_assert_uimm_bits!(NORM, `4`);
7299	static_assert_uimm_bits!(SIGN, `2`);
7300	let a: f32x4 = a.as_f32x4();
7301	let src: f32x4 = src.as_f32x4();
7302	let r: f32x4 = vgetmantps128(a, SIGN << `2` \| NORM, src, m:k);
7303	transmute(src:r)
7304	}
7305	}
7306
7307	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7310	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7311	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7312	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313	/// The sign is determined by sc which can take the following values:\
7314	/// _MM_MANT_SIGN_src // sign = sign(src)\
7315	/// _MM_MANT_SIGN_zero // sign = 0\
7316	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7317	///
7318	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319	#[inline]
7320	#[target_feature(enable = "avx512f,avx512vl")]
7321	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7322	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7323	#[rustc_legacy_const_generics(`2`, `3`)]
7324	pub fn _mm_maskz_getmant_ps<
7325	const NORM: _MM_MANTISSA_NORM_ENUM,
7326	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327	>(
7328	k: __mmask8,
7329	a: __m128,
7330	) -> __m128 {
7331	unsafe {
7332	static_assert_uimm_bits!(NORM, `4`);
7333	static_assert_uimm_bits!(SIGN, `2`);
7334	let a: f32x4 = a.as_f32x4();
7335	let r: f32x4 = vgetmantps128(a, SIGN << `2` \| NORM, src:f32x4::ZERO, m:k);
7336	transmute(src:r)
7337	}
7338	}
7339
7340	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7343	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7344	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7345	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346	/// The sign is determined by sc which can take the following values:\
7347	/// _MM_MANT_SIGN_src // sign = sign(src)\
7348	/// _MM_MANT_SIGN_zero // sign = 0\
7349	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7350	///
7351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352	#[inline]
7353	#[target_feature(enable = "avx512f")]
7354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7355	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7356	#[rustc_legacy_const_generics(`1`, `2`)]
7357	pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358	a: __m512d,
7359	) -> __m512d {
7360	unsafe {
7361	static_assert_uimm_bits!(NORM, `4`);
7362	static_assert_uimm_bits!(SIGN, `2`);
7363	let a: f64x8 = a.as_f64x8();
7364	let zero: f64x8 = f64x8::ZERO;
7365	let r: f64x8 = vgetmantpd(
7366	a,
7367	SIGN << `2` \| NORM,
7368	src:zero,
7369	m:`0b11111111`,
7370	_MM_FROUND_CUR_DIRECTION,
7371	);
7372	transmute(src:r)
7373	}
7374	}
7375
7376	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7379	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7380	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7381	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382	/// The sign is determined by sc which can take the following values:\
7383	/// _MM_MANT_SIGN_src // sign = sign(src)\
7384	/// _MM_MANT_SIGN_zero // sign = 0\
7385	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7386	///
7387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388	#[inline]
7389	#[target_feature(enable = "avx512f")]
7390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7391	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7392	#[rustc_legacy_const_generics(`3`, `4`)]
7393	pub fn _mm512_mask_getmant_pd<
7394	const NORM: _MM_MANTISSA_NORM_ENUM,
7395	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396	>(
7397	src: __m512d,
7398	k: __mmask8,
7399	a: __m512d,
7400	) -> __m512d {
7401	unsafe {
7402	static_assert_uimm_bits!(NORM, `4`);
7403	static_assert_uimm_bits!(SIGN, `2`);
7404	let a: f64x8 = a.as_f64x8();
7405	let src: f64x8 = src.as_f64x8();
7406	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7407	transmute(src:r)
7408	}
7409	}
7410
7411	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7414	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7415	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7416	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417	/// The sign is determined by sc which can take the following values:\
7418	/// _MM_MANT_SIGN_src // sign = sign(src)\
7419	/// _MM_MANT_SIGN_zero // sign = 0\
7420	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7421	///
7422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423	#[inline]
7424	#[target_feature(enable = "avx512f")]
7425	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7427	#[rustc_legacy_const_generics(`2`, `3`)]
7428	pub fn _mm512_maskz_getmant_pd<
7429	const NORM: _MM_MANTISSA_NORM_ENUM,
7430	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431	>(
7432	k: __mmask8,
7433	a: __m512d,
7434	) -> __m512d {
7435	unsafe {
7436	static_assert_uimm_bits!(NORM, `4`);
7437	static_assert_uimm_bits!(SIGN, `2`);
7438	let a: f64x8 = a.as_f64x8();
7439	let r: f64x8 = vgetmantpd(
7440	a,
7441	SIGN << `2` \| NORM,
7442	src:f64x8::ZERO,
7443	m:k,
7444	_MM_FROUND_CUR_DIRECTION,
7445	);
7446	transmute(src:r)
7447	}
7448	}
7449
7450	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7453	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7454	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7455	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456	/// The sign is determined by sc which can take the following values:\
7457	/// _MM_MANT_SIGN_src // sign = sign(src)\
7458	/// _MM_MANT_SIGN_zero // sign = 0\
7459	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7460	///
7461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462	#[inline]
7463	#[target_feature(enable = "avx512f,avx512vl")]
7464	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7465	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7466	#[rustc_legacy_const_generics(`1`, `2`)]
7467	pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468	a: __m256d,
7469	) -> __m256d {
7470	unsafe {
7471	static_assert_uimm_bits!(NORM, `4`);
7472	static_assert_uimm_bits!(SIGN, `2`);
7473	let a: f64x4 = a.as_f64x4();
7474	let r: f64x4 = vgetmantpd256(a, SIGN << `2` \| NORM, src:f64x4::ZERO, m:`0b00001111`);
7475	transmute(src:r)
7476	}
7477	}
7478
7479	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7482	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7483	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7484	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485	/// The sign is determined by sc which can take the following values:\
7486	/// _MM_MANT_SIGN_src // sign = sign(src)\
7487	/// _MM_MANT_SIGN_zero // sign = 0\
7488	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7489	///
7490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491	#[inline]
7492	#[target_feature(enable = "avx512f,avx512vl")]
7493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7494	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7495	#[rustc_legacy_const_generics(`3`, `4`)]
7496	pub fn _mm256_mask_getmant_pd<
7497	const NORM: _MM_MANTISSA_NORM_ENUM,
7498	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499	>(
7500	src: __m256d,
7501	k: __mmask8,
7502	a: __m256d,
7503	) -> __m256d {
7504	unsafe {
7505	static_assert_uimm_bits!(NORM, `4`);
7506	static_assert_uimm_bits!(SIGN, `2`);
7507	let a: f64x4 = a.as_f64x4();
7508	let src: f64x4 = src.as_f64x4();
7509	let r: f64x4 = vgetmantpd256(a, SIGN << `2` \| NORM, src, m:k);
7510	transmute(src:r)
7511	}
7512	}
7513
7514	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7517	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7518	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7519	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520	/// The sign is determined by sc which can take the following values:\
7521	/// _MM_MANT_SIGN_src // sign = sign(src)\
7522	/// _MM_MANT_SIGN_zero // sign = 0\
7523	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7524	///
7525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526	#[inline]
7527	#[target_feature(enable = "avx512f,avx512vl")]
7528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7529	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7530	#[rustc_legacy_const_generics(`2`, `3`)]
7531	pub fn _mm256_maskz_getmant_pd<
7532	const NORM: _MM_MANTISSA_NORM_ENUM,
7533	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534	>(
7535	k: __mmask8,
7536	a: __m256d,
7537	) -> __m256d {
7538	unsafe {
7539	static_assert_uimm_bits!(NORM, `4`);
7540	static_assert_uimm_bits!(SIGN, `2`);
7541	let a: f64x4 = a.as_f64x4();
7542	let r: f64x4 = vgetmantpd256(a, SIGN << `2` \| NORM, src:f64x4::ZERO, m:k);
7543	transmute(src:r)
7544	}
7545	}
7546
7547	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7550	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7551	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7552	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553	/// The sign is determined by sc which can take the following values:\
7554	/// _MM_MANT_SIGN_src // sign = sign(src)\
7555	/// _MM_MANT_SIGN_zero // sign = 0\
7556	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7557	///
7558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559	#[inline]
7560	#[target_feature(enable = "avx512f,avx512vl")]
7561	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7562	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7563	#[rustc_legacy_const_generics(`1`, `2`)]
7564	pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565	a: __m128d,
7566	) -> __m128d {
7567	unsafe {
7568	static_assert_uimm_bits!(NORM, `4`);
7569	static_assert_uimm_bits!(SIGN, `2`);
7570	let a: f64x2 = a.as_f64x2();
7571	let r: f64x2 = vgetmantpd128(a, SIGN << `2` \| NORM, src:f64x2::ZERO, m:`0b00000011`);
7572	transmute(src:r)
7573	}
7574	}
7575
7576	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7579	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7580	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7581	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582	/// The sign is determined by sc which can take the following values:\
7583	/// _MM_MANT_SIGN_src // sign = sign(src)\
7584	/// _MM_MANT_SIGN_zero // sign = 0\
7585	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7586	///
7587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588	#[inline]
7589	#[target_feature(enable = "avx512f,avx512vl")]
7590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7592	#[rustc_legacy_const_generics(`3`, `4`)]
7593	pub fn _mm_mask_getmant_pd<
7594	const NORM: _MM_MANTISSA_NORM_ENUM,
7595	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596	>(
7597	src: __m128d,
7598	k: __mmask8,
7599	a: __m128d,
7600	) -> __m128d {
7601	unsafe {
7602	static_assert_uimm_bits!(NORM, `4`);
7603	static_assert_uimm_bits!(SIGN, `2`);
7604	let a: f64x2 = a.as_f64x2();
7605	let src: f64x2 = src.as_f64x2();
7606	let r: f64x2 = vgetmantpd128(a, SIGN << `2` \| NORM, src, m:k);
7607	transmute(src:r)
7608	}
7609	}
7610
7611	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7614	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7615	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7616	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617	/// The sign is determined by sc which can take the following values:\
7618	/// _MM_MANT_SIGN_src // sign = sign(src)\
7619	/// _MM_MANT_SIGN_zero // sign = 0\
7620	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7621	///
7622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623	#[inline]
7624	#[target_feature(enable = "avx512f,avx512vl")]
7625	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7626	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7627	#[rustc_legacy_const_generics(`2`, `3`)]
7628	pub fn _mm_maskz_getmant_pd<
7629	const NORM: _MM_MANTISSA_NORM_ENUM,
7630	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631	>(
7632	k: __mmask8,
7633	a: __m128d,
7634	) -> __m128d {
7635	unsafe {
7636	static_assert_uimm_bits!(NORM, `4`);
7637	static_assert_uimm_bits!(SIGN, `2`);
7638	let a: f64x2 = a.as_f64x2();
7639	let r: f64x2 = vgetmantpd128(a, SIGN << `2` \| NORM, src:f64x2::ZERO, m:k);
7640	transmute(src:r)
7641	}
7642	}
7643
7644	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645	///
7646	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652	///
7653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654	#[inline]
7655	#[target_feature(enable = "avx512f")]
7656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7657	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7658	#[rustc_legacy_const_generics(`2`)]
7659	pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660	unsafe {
7661	static_assert_rounding!(ROUNDING);
7662	let a: f32x16 = a.as_f32x16();
7663	let b: f32x16 = b.as_f32x16();
7664	let r: f32x16 = vaddps(a, b, ROUNDING);
7665	transmute(src:r)
7666	}
7667	}
7668
7669	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670	///
7671	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677	///
7678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679	#[inline]
7680	#[target_feature(enable = "avx512f")]
7681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7682	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7683	#[rustc_legacy_const_generics(`4`)]
7684	pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685	src: __m512,
7686	k: __mmask16,
7687	a: __m512,
7688	b: __m512,
7689	) -> __m512 {
7690	unsafe {
7691	static_assert_rounding!(ROUNDING);
7692	let a: f32x16 = a.as_f32x16();
7693	let b: f32x16 = b.as_f32x16();
7694	let r: f32x16 = vaddps(a, b, ROUNDING);
7695	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7696	}
7697	}
7698
7699	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700	///
7701	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707	///
7708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709	#[inline]
7710	#[target_feature(enable = "avx512f")]
7711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7712	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7713	#[rustc_legacy_const_generics(`3`)]
7714	pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715	k: __mmask16,
7716	a: __m512,
7717	b: __m512,
7718	) -> __m512 {
7719	unsafe {
7720	static_assert_rounding!(ROUNDING);
7721	let a: f32x16 = a.as_f32x16();
7722	let b: f32x16 = b.as_f32x16();
7723	let r: f32x16 = vaddps(a, b, ROUNDING);
7724	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
7725	}
7726	}
7727
7728	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729	///
7730	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736	///
7737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738	#[inline]
7739	#[target_feature(enable = "avx512f")]
7740	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
7742	#[rustc_legacy_const_generics(`2`)]
7743	pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744	unsafe {
7745	static_assert_rounding!(ROUNDING);
7746	let a: f64x8 = a.as_f64x8();
7747	let b: f64x8 = b.as_f64x8();
7748	let r: f64x8 = vaddpd(a, b, ROUNDING);
7749	transmute(src:r)
7750	}
7751	}
7752
7753	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754	///
7755	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761	///
7762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763	#[inline]
7764	#[target_feature(enable = "avx512f")]
7765	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7766	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
7767	#[rustc_legacy_const_generics(`4`)]
7768	pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769	src: __m512d,
7770	k: __mmask8,
7771	a: __m512d,
7772	b: __m512d,
7773	) -> __m512d {
7774	unsafe {
7775	static_assert_rounding!(ROUNDING);
7776	let a: f64x8 = a.as_f64x8();
7777	let b: f64x8 = b.as_f64x8();
7778	let r: f64x8 = vaddpd(a, b, ROUNDING);
7779	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7780	}
7781	}
7782
7783	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784	///
7785	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791	///
7792	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793	#[inline]
7794	#[target_feature(enable = "avx512f")]
7795	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7796	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
7797	#[rustc_legacy_const_generics(`3`)]
7798	pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799	k: __mmask8,
7800	a: __m512d,
7801	b: __m512d,
7802	) -> __m512d {
7803	unsafe {
7804	static_assert_rounding!(ROUNDING);
7805	let a: f64x8 = a.as_f64x8();
7806	let b: f64x8 = b.as_f64x8();
7807	let r: f64x8 = vaddpd(a, b, ROUNDING);
7808	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
7809	}
7810	}
7811
7812	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813	///
7814	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820	///
7821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822	#[inline]
7823	#[target_feature(enable = "avx512f")]
7824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7825	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
7826	#[rustc_legacy_const_generics(`2`)]
7827	pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828	unsafe {
7829	static_assert_rounding!(ROUNDING);
7830	let a: f32x16 = a.as_f32x16();
7831	let b: f32x16 = b.as_f32x16();
7832	let r: f32x16 = vsubps(a, b, ROUNDING);
7833	transmute(src:r)
7834	}
7835	}
7836
7837	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838	///
7839	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845	///
7846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847	#[inline]
7848	#[target_feature(enable = "avx512f")]
7849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7850	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
7851	#[rustc_legacy_const_generics(`4`)]
7852	pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853	src: __m512,
7854	k: __mmask16,
7855	a: __m512,
7856	b: __m512,
7857	) -> __m512 {
7858	unsafe {
7859	static_assert_rounding!(ROUNDING);
7860	let a: f32x16 = a.as_f32x16();
7861	let b: f32x16 = b.as_f32x16();
7862	let r: f32x16 = vsubps(a, b, ROUNDING);
7863	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7864	}
7865	}
7866
7867	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868	///
7869	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875	///
7876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877	#[inline]
7878	#[target_feature(enable = "avx512f")]
7879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7880	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
7881	#[rustc_legacy_const_generics(`3`)]
7882	pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883	k: __mmask16,
7884	a: __m512,
7885	b: __m512,
7886	) -> __m512 {
7887	unsafe {
7888	static_assert_rounding!(ROUNDING);
7889	let a: f32x16 = a.as_f32x16();
7890	let b: f32x16 = b.as_f32x16();
7891	let r: f32x16 = vsubps(a, b, ROUNDING);
7892	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
7893	}
7894	}
7895
7896	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897	///
7898	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904	///
7905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906	#[inline]
7907	#[target_feature(enable = "avx512f")]
7908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7909	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
7910	#[rustc_legacy_const_generics(`2`)]
7911	pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912	unsafe {
7913	static_assert_rounding!(ROUNDING);
7914	let a: f64x8 = a.as_f64x8();
7915	let b: f64x8 = b.as_f64x8();
7916	let r: f64x8 = vsubpd(a, b, ROUNDING);
7917	transmute(src:r)
7918	}
7919	}
7920
7921	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922	///
7923	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929	///
7930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931	#[inline]
7932	#[target_feature(enable = "avx512f")]
7933	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
7935	#[rustc_legacy_const_generics(`4`)]
7936	pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937	src: __m512d,
7938	k: __mmask8,
7939	a: __m512d,
7940	b: __m512d,
7941	) -> __m512d {
7942	unsafe {
7943	static_assert_rounding!(ROUNDING);
7944	let a: f64x8 = a.as_f64x8();
7945	let b: f64x8 = b.as_f64x8();
7946	let r: f64x8 = vsubpd(a, b, ROUNDING);
7947	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7948	}
7949	}
7950
7951	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952	///
7953	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959	///
7960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961	#[inline]
7962	#[target_feature(enable = "avx512f")]
7963	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7964	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
7965	#[rustc_legacy_const_generics(`3`)]
7966	pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967	k: __mmask8,
7968	a: __m512d,
7969	b: __m512d,
7970	) -> __m512d {
7971	unsafe {
7972	static_assert_rounding!(ROUNDING);
7973	let a: f64x8 = a.as_f64x8();
7974	let b: f64x8 = b.as_f64x8();
7975	let r: f64x8 = vsubpd(a, b, ROUNDING);
7976	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
7977	}
7978	}
7979
7980	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981	///
7982	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988	///
7989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990	#[inline]
7991	#[target_feature(enable = "avx512f")]
7992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7993	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
7994	#[rustc_legacy_const_generics(`2`)]
7995	pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996	unsafe {
7997	static_assert_rounding!(ROUNDING);
7998	let a: f32x16 = a.as_f32x16();
7999	let b: f32x16 = b.as_f32x16();
8000	let r: f32x16 = vmulps(a, b, ROUNDING);
8001	transmute(src:r)
8002	}
8003	}
8004
8005	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006	///
8007	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013	///
8014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015	#[inline]
8016	#[target_feature(enable = "avx512f")]
8017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8018	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
8019	#[rustc_legacy_const_generics(`4`)]
8020	pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021	src: __m512,
8022	k: __mmask16,
8023	a: __m512,
8024	b: __m512,
8025	) -> __m512 {
8026	unsafe {
8027	static_assert_rounding!(ROUNDING);
8028	let a: f32x16 = a.as_f32x16();
8029	let b: f32x16 = b.as_f32x16();
8030	let r: f32x16 = vmulps(a, b, ROUNDING);
8031	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8032	}
8033	}
8034
8035	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036	///
8037	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043	///
8044	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045	#[inline]
8046	#[target_feature(enable = "avx512f")]
8047	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8048	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
8049	#[rustc_legacy_const_generics(`3`)]
8050	pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051	k: __mmask16,
8052	a: __m512,
8053	b: __m512,
8054	) -> __m512 {
8055	unsafe {
8056	static_assert_rounding!(ROUNDING);
8057	let a: f32x16 = a.as_f32x16();
8058	let b: f32x16 = b.as_f32x16();
8059	let r: f32x16 = vmulps(a, b, ROUNDING);
8060	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8061	}
8062	}
8063
8064	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065	///
8066	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072	///
8073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074	#[inline]
8075	#[target_feature(enable = "avx512f")]
8076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
8078	#[rustc_legacy_const_generics(`2`)]
8079	pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080	unsafe {
8081	static_assert_rounding!(ROUNDING);
8082	let a: f64x8 = a.as_f64x8();
8083	let b: f64x8 = b.as_f64x8();
8084	let r: f64x8 = vmulpd(a, b, ROUNDING);
8085	transmute(src:r)
8086	}
8087	}
8088
8089	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090	///
8091	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097	///
8098	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099	#[inline]
8100	#[target_feature(enable = "avx512f")]
8101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8102	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
8103	#[rustc_legacy_const_generics(`4`)]
8104	pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105	src: __m512d,
8106	k: __mmask8,
8107	a: __m512d,
8108	b: __m512d,
8109	) -> __m512d {
8110	unsafe {
8111	static_assert_rounding!(ROUNDING);
8112	let a: f64x8 = a.as_f64x8();
8113	let b: f64x8 = b.as_f64x8();
8114	let r: f64x8 = vmulpd(a, b, ROUNDING);
8115	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8116	}
8117	}
8118
8119	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120	///
8121	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127	///
8128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129	#[inline]
8130	#[target_feature(enable = "avx512f")]
8131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8132	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
8133	#[rustc_legacy_const_generics(`3`)]
8134	pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135	k: __mmask8,
8136	a: __m512d,
8137	b: __m512d,
8138	) -> __m512d {
8139	unsafe {
8140	static_assert_rounding!(ROUNDING);
8141	let a: f64x8 = a.as_f64x8();
8142	let b: f64x8 = b.as_f64x8();
8143	let r: f64x8 = vmulpd(a, b, ROUNDING);
8144	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8145	}
8146	}
8147
8148	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149	///
8150	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156	///
8157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158	#[inline]
8159	#[target_feature(enable = "avx512f")]
8160	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8161	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
8162	#[rustc_legacy_const_generics(`2`)]
8163	pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164	unsafe {
8165	static_assert_rounding!(ROUNDING);
8166	let a: f32x16 = a.as_f32x16();
8167	let b: f32x16 = b.as_f32x16();
8168	let r: f32x16 = vdivps(a, b, ROUNDING);
8169	transmute(src:r)
8170	}
8171	}
8172
8173	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174	///
8175	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181	///
8182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183	#[inline]
8184	#[target_feature(enable = "avx512f")]
8185	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8186	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
8187	#[rustc_legacy_const_generics(`4`)]
8188	pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189	src: __m512,
8190	k: __mmask16,
8191	a: __m512,
8192	b: __m512,
8193	) -> __m512 {
8194	unsafe {
8195	static_assert_rounding!(ROUNDING);
8196	let a: f32x16 = a.as_f32x16();
8197	let b: f32x16 = b.as_f32x16();
8198	let r: f32x16 = vdivps(a, b, ROUNDING);
8199	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8200	}
8201	}
8202
8203	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204	///
8205	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211	///
8212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213	#[inline]
8214	#[target_feature(enable = "avx512f")]
8215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8216	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
8217	#[rustc_legacy_const_generics(`3`)]
8218	pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219	k: __mmask16,
8220	a: __m512,
8221	b: __m512,
8222	) -> __m512 {
8223	unsafe {
8224	static_assert_rounding!(ROUNDING);
8225	let a: f32x16 = a.as_f32x16();
8226	let b: f32x16 = b.as_f32x16();
8227	let r: f32x16 = vdivps(a, b, ROUNDING);
8228	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8229	}
8230	}
8231
8232	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233	///
8234	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240	///
8241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242	#[inline]
8243	#[target_feature(enable = "avx512f")]
8244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8245	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
8246	#[rustc_legacy_const_generics(`2`)]
8247	pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248	unsafe {
8249	static_assert_rounding!(ROUNDING);
8250	let a: f64x8 = a.as_f64x8();
8251	let b: f64x8 = b.as_f64x8();
8252	let r: f64x8 = vdivpd(a, b, ROUNDING);
8253	transmute(src:r)
8254	}
8255	}
8256
8257	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258	///
8259	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265	///
8266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267	#[inline]
8268	#[target_feature(enable = "avx512f")]
8269	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8270	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
8271	#[rustc_legacy_const_generics(`4`)]
8272	pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273	src: __m512d,
8274	k: __mmask8,
8275	a: __m512d,
8276	b: __m512d,
8277	) -> __m512d {
8278	unsafe {
8279	static_assert_rounding!(ROUNDING);
8280	let a: f64x8 = a.as_f64x8();
8281	let b: f64x8 = b.as_f64x8();
8282	let r: f64x8 = vdivpd(a, b, ROUNDING);
8283	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8284	}
8285	}
8286
8287	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288	///
8289	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295	///
8296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297	#[inline]
8298	#[target_feature(enable = "avx512f")]
8299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8300	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
8301	#[rustc_legacy_const_generics(`3`)]
8302	pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303	k: __mmask8,
8304	a: __m512d,
8305	b: __m512d,
8306	) -> __m512d {
8307	unsafe {
8308	static_assert_rounding!(ROUNDING);
8309	let a: f64x8 = a.as_f64x8();
8310	let b: f64x8 = b.as_f64x8();
8311	let r: f64x8 = vdivpd(a, b, ROUNDING);
8312	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8313	}
8314	}
8315
8316	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317	///
8318	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324	///
8325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326	#[inline]
8327	#[target_feature(enable = "avx512f")]
8328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8329	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8330	#[rustc_legacy_const_generics(`1`)]
8331	pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332	unsafe {
8333	static_assert_rounding!(ROUNDING);
8334	let a: f32x16 = a.as_f32x16();
8335	let r: f32x16 = vsqrtps(a, ROUNDING);
8336	transmute(src:r)
8337	}
8338	}
8339
8340	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341	///
8342	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348	///
8349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350	#[inline]
8351	#[target_feature(enable = "avx512f")]
8352	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8353	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8354	#[rustc_legacy_const_generics(`3`)]
8355	pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356	src: __m512,
8357	k: __mmask16,
8358	a: __m512,
8359	) -> __m512 {
8360	unsafe {
8361	static_assert_rounding!(ROUNDING);
8362	let a: f32x16 = a.as_f32x16();
8363	let r: f32x16 = vsqrtps(a, ROUNDING);
8364	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8365	}
8366	}
8367
8368	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369	///
8370	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376	///
8377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378	#[inline]
8379	#[target_feature(enable = "avx512f")]
8380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8381	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8382	#[rustc_legacy_const_generics(`2`)]
8383	pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384	unsafe {
8385	static_assert_rounding!(ROUNDING);
8386	let a: f32x16 = a.as_f32x16();
8387	let r: f32x16 = vsqrtps(a, ROUNDING);
8388	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8389	}
8390	}
8391
8392	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393	///
8394	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400	///
8401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402	#[inline]
8403	#[target_feature(enable = "avx512f")]
8404	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8405	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8406	#[rustc_legacy_const_generics(`1`)]
8407	pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408	unsafe {
8409	static_assert_rounding!(ROUNDING);
8410	let a: f64x8 = a.as_f64x8();
8411	let r: f64x8 = vsqrtpd(a, ROUNDING);
8412	transmute(src:r)
8413	}
8414	}
8415
8416	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417	///
8418	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424	///
8425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426	#[inline]
8427	#[target_feature(enable = "avx512f")]
8428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8429	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8430	#[rustc_legacy_const_generics(`3`)]
8431	pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432	src: __m512d,
8433	k: __mmask8,
8434	a: __m512d,
8435	) -> __m512d {
8436	unsafe {
8437	static_assert_rounding!(ROUNDING);
8438	let a: f64x8 = a.as_f64x8();
8439	let r: f64x8 = vsqrtpd(a, ROUNDING);
8440	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8441	}
8442	}
8443
8444	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445	///
8446	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452	///
8453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454	#[inline]
8455	#[target_feature(enable = "avx512f")]
8456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8457	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8458	#[rustc_legacy_const_generics(`2`)]
8459	pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460	unsafe {
8461	static_assert_rounding!(ROUNDING);
8462	let a: f64x8 = a.as_f64x8();
8463	let r: f64x8 = vsqrtpd(a, ROUNDING);
8464	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8465	}
8466	}
8467
8468	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469	///
8470	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476	///
8477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478	#[inline]
8479	#[target_feature(enable = "avx512f")]
8480	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8481	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482	#[rustc_legacy_const_generics(`3`)]
8483	pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484	unsafe {
8485	static_assert_rounding!(ROUNDING);
8486	vfmadd132psround(a, b, c, ROUNDING)
8487	}
8488	}
8489
8490	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491	///
8492	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498	///
8499	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500	#[inline]
8501	#[target_feature(enable = "avx512f")]
8502	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8503	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504	#[rustc_legacy_const_generics(`4`)]
8505	pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506	a: __m512,
8507	k: __mmask16,
8508	b: __m512,
8509	c: __m512,
8510	) -> __m512 {
8511	unsafe {
8512	static_assert_rounding!(ROUNDING);
8513	simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:a)
8514	}
8515	}
8516
8517	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518	///
8519	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525	///
8526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527	#[inline]
8528	#[target_feature(enable = "avx512f")]
8529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8530	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531	#[rustc_legacy_const_generics(`4`)]
8532	pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533	k: __mmask16,
8534	a: __m512,
8535	b: __m512,
8536	c: __m512,
8537	) -> __m512 {
8538	unsafe {
8539	static_assert_rounding!(ROUNDING);
8540	simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8541	}
8542	}
8543
8544	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545	///
8546	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552	///
8553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554	#[inline]
8555	#[target_feature(enable = "avx512f")]
8556	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8557	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558	#[rustc_legacy_const_generics(`4`)]
8559	pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560	a: __m512,
8561	b: __m512,
8562	c: __m512,
8563	k: __mmask16,
8564	) -> __m512 {
8565	unsafe {
8566	static_assert_rounding!(ROUNDING);
8567	simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:c)
8568	}
8569	}
8570
8571	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572	///
8573	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579	///
8580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581	#[inline]
8582	#[target_feature(enable = "avx512f")]
8583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8584	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585	#[rustc_legacy_const_generics(`3`)]
8586	pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587	unsafe {
8588	static_assert_rounding!(ROUNDING);
8589	vfmadd132pdround(a, b, c, ROUNDING)
8590	}
8591	}
8592
8593	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594	///
8595	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601	///
8602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603	#[inline]
8604	#[target_feature(enable = "avx512f")]
8605	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8606	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607	#[rustc_legacy_const_generics(`4`)]
8608	pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609	a: __m512d,
8610	k: __mmask8,
8611	b: __m512d,
8612	c: __m512d,
8613	) -> __m512d {
8614	unsafe {
8615	static_assert_rounding!(ROUNDING);
8616	simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:a)
8617	}
8618	}
8619
8620	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621	///
8622	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628	///
8629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630	#[inline]
8631	#[target_feature(enable = "avx512f")]
8632	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8633	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634	#[rustc_legacy_const_generics(`4`)]
8635	pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636	k: __mmask8,
8637	a: __m512d,
8638	b: __m512d,
8639	c: __m512d,
8640	) -> __m512d {
8641	unsafe {
8642	static_assert_rounding!(ROUNDING);
8643	simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
8644	}
8645	}
8646
8647	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648	///
8649	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655	///
8656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657	#[inline]
8658	#[target_feature(enable = "avx512f")]
8659	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8660	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661	#[rustc_legacy_const_generics(`4`)]
8662	pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663	a: __m512d,
8664	b: __m512d,
8665	c: __m512d,
8666	k: __mmask8,
8667	) -> __m512d {
8668	unsafe {
8669	static_assert_rounding!(ROUNDING);
8670	simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:c)
8671	}
8672	}
8673
8674	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675	///
8676	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682	///
8683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684	#[inline]
8685	#[target_feature(enable = "avx512f")]
8686	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8687	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688	#[rustc_legacy_const_generics(`3`)]
8689	pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690	unsafe {
8691	static_assert_rounding!(ROUNDING);
8692	vfmadd132psround(a, b, c:simd_neg(c), ROUNDING)
8693	}
8694	}
8695
8696	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697	///
8698	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704	///
8705	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706	#[inline]
8707	#[target_feature(enable = "avx512f")]
8708	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8709	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710	#[rustc_legacy_const_generics(`4`)]
8711	pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712	a: __m512,
8713	k: __mmask16,
8714	b: __m512,
8715	c: __m512,
8716	) -> __m512 {
8717	unsafe {
8718	static_assert_rounding!(ROUNDING);
8719	let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8720	simd_select_bitmask(m:k, yes:r, no:a)
8721	}
8722	}
8723
8724	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725	///
8726	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732	///
8733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734	#[inline]
8735	#[target_feature(enable = "avx512f")]
8736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8737	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738	#[rustc_legacy_const_generics(`4`)]
8739	pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740	k: __mmask16,
8741	a: __m512,
8742	b: __m512,
8743	c: __m512,
8744	) -> __m512 {
8745	unsafe {
8746	static_assert_rounding!(ROUNDING);
8747	let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8748	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
8749	}
8750	}
8751
8752	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753	///
8754	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760	///
8761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762	#[inline]
8763	#[target_feature(enable = "avx512f")]
8764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8765	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766	#[rustc_legacy_const_generics(`4`)]
8767	pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768	a: __m512,
8769	b: __m512,
8770	c: __m512,
8771	k: __mmask16,
8772	) -> __m512 {
8773	unsafe {
8774	static_assert_rounding!(ROUNDING);
8775	let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8776	simd_select_bitmask(m:k, yes:r, no:c)
8777	}
8778	}
8779
8780	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781	///
8782	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788	///
8789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790	#[inline]
8791	#[target_feature(enable = "avx512f")]
8792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794	#[rustc_legacy_const_generics(`3`)]
8795	pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796	unsafe {
8797	static_assert_rounding!(ROUNDING);
8798	vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING)
8799	}
8800	}
8801
8802	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803	///
8804	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810	///
8811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812	#[inline]
8813	#[target_feature(enable = "avx512f")]
8814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8815	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816	#[rustc_legacy_const_generics(`4`)]
8817	pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818	a: __m512d,
8819	k: __mmask8,
8820	b: __m512d,
8821	c: __m512d,
8822	) -> __m512d {
8823	unsafe {
8824	static_assert_rounding!(ROUNDING);
8825	let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8826	simd_select_bitmask(m:k, yes:r, no:a)
8827	}
8828	}
8829
8830	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831	///
8832	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838	///
8839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840	#[inline]
8841	#[target_feature(enable = "avx512f")]
8842	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8843	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844	#[rustc_legacy_const_generics(`4`)]
8845	pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846	k: __mmask8,
8847	a: __m512d,
8848	b: __m512d,
8849	c: __m512d,
8850	) -> __m512d {
8851	unsafe {
8852	static_assert_rounding!(ROUNDING);
8853	let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8854	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
8855	}
8856	}
8857
8858	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859	///
8860	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866	///
8867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868	#[inline]
8869	#[target_feature(enable = "avx512f")]
8870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8871	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872	#[rustc_legacy_const_generics(`4`)]
8873	pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874	a: __m512d,
8875	b: __m512d,
8876	c: __m512d,
8877	k: __mmask8,
8878	) -> __m512d {
8879	unsafe {
8880	static_assert_rounding!(ROUNDING);
8881	let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8882	simd_select_bitmask(m:k, yes:r, no:c)
8883	}
8884	}
8885
8886	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887	///
8888	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894	///
8895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896	#[inline]
8897	#[target_feature(enable = "avx512f")]
8898	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8899	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900	#[rustc_legacy_const_generics(`3`)]
8901	pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902	unsafe {
8903	static_assert_rounding!(ROUNDING);
8904	vfmaddsubpsround(a, b, c, ROUNDING)
8905	}
8906	}
8907
8908	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909	///
8910	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916	///
8917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918	#[inline]
8919	#[target_feature(enable = "avx512f")]
8920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8921	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922	#[rustc_legacy_const_generics(`4`)]
8923	pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924	a: __m512,
8925	k: __mmask16,
8926	b: __m512,
8927	c: __m512,
8928	) -> __m512 {
8929	unsafe {
8930	static_assert_rounding!(ROUNDING);
8931	simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:a)
8932	}
8933	}
8934
8935	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936	///
8937	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943	///
8944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945	#[inline]
8946	#[target_feature(enable = "avx512f")]
8947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949	#[rustc_legacy_const_generics(`4`)]
8950	pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951	k: __mmask16,
8952	a: __m512,
8953	b: __m512,
8954	c: __m512,
8955	) -> __m512 {
8956	unsafe {
8957	static_assert_rounding!(ROUNDING);
8958	simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8959	}
8960	}
8961
8962	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963	///
8964	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970	///
8971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972	#[inline]
8973	#[target_feature(enable = "avx512f")]
8974	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8975	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976	#[rustc_legacy_const_generics(`4`)]
8977	pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978	a: __m512,
8979	b: __m512,
8980	c: __m512,
8981	k: __mmask16,
8982	) -> __m512 {
8983	unsafe {
8984	static_assert_rounding!(ROUNDING);
8985	simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:c)
8986	}
8987	}
8988
8989	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990	///
8991	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997	///
8998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999	#[inline]
9000	#[target_feature(enable = "avx512f")]
9001	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9002	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003	#[rustc_legacy_const_generics(`3`)]
9004	pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005	a: __m512d,
9006	b: __m512d,
9007	c: __m512d,
9008	) -> __m512d {
9009	unsafe {
9010	static_assert_rounding!(ROUNDING);
9011	vfmaddsubpdround(a, b, c, ROUNDING)
9012	}
9013	}
9014
9015	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016	///
9017	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023	///
9024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025	#[inline]
9026	#[target_feature(enable = "avx512f")]
9027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9028	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029	#[rustc_legacy_const_generics(`4`)]
9030	pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031	a: __m512d,
9032	k: __mmask8,
9033	b: __m512d,
9034	c: __m512d,
9035	) -> __m512d {
9036	unsafe {
9037	static_assert_rounding!(ROUNDING);
9038	simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:a)
9039	}
9040	}
9041
9042	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043	///
9044	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050	///
9051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052	#[inline]
9053	#[target_feature(enable = "avx512f")]
9054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9055	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056	#[rustc_legacy_const_generics(`4`)]
9057	pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058	k: __mmask8,
9059	a: __m512d,
9060	b: __m512d,
9061	c: __m512d,
9062	) -> __m512d {
9063	unsafe {
9064	static_assert_rounding!(ROUNDING);
9065	simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
9066	}
9067	}
9068
9069	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070	///
9071	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077	///
9078	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079	#[inline]
9080	#[target_feature(enable = "avx512f")]
9081	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9082	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083	#[rustc_legacy_const_generics(`4`)]
9084	pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085	a: __m512d,
9086	b: __m512d,
9087	c: __m512d,
9088	k: __mmask8,
9089	) -> __m512d {
9090	unsafe {
9091	static_assert_rounding!(ROUNDING);
9092	simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:c)
9093	}
9094	}
9095
9096	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097	///
9098	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104	///
9105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106	#[inline]
9107	#[target_feature(enable = "avx512f")]
9108	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110	#[rustc_legacy_const_generics(`3`)]
9111	pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112	unsafe {
9113	static_assert_rounding!(ROUNDING);
9114	vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING)
9115	}
9116	}
9117
9118	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119	///
9120	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126	///
9127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128	#[inline]
9129	#[target_feature(enable = "avx512f")]
9130	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132	#[rustc_legacy_const_generics(`4`)]
9133	pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134	a: __m512,
9135	k: __mmask16,
9136	b: __m512,
9137	c: __m512,
9138	) -> __m512 {
9139	unsafe {
9140	static_assert_rounding!(ROUNDING);
9141	let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9142	simd_select_bitmask(m:k, yes:r, no:a)
9143	}
9144	}
9145
9146	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147	///
9148	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154	///
9155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156	#[inline]
9157	#[target_feature(enable = "avx512f")]
9158	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9159	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160	#[rustc_legacy_const_generics(`4`)]
9161	pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162	k: __mmask16,
9163	a: __m512,
9164	b: __m512,
9165	c: __m512,
9166	) -> __m512 {
9167	unsafe {
9168	static_assert_rounding!(ROUNDING);
9169	let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9170	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9171	}
9172	}
9173
9174	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175	///
9176	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182	///
9183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184	#[inline]
9185	#[target_feature(enable = "avx512f")]
9186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9187	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188	#[rustc_legacy_const_generics(`4`)]
9189	pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190	a: __m512,
9191	b: __m512,
9192	c: __m512,
9193	k: __mmask16,
9194	) -> __m512 {
9195	unsafe {
9196	static_assert_rounding!(ROUNDING);
9197	let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9198	simd_select_bitmask(m:k, yes:r, no:c)
9199	}
9200	}
9201
9202	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203	///
9204	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210	///
9211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212	#[inline]
9213	#[target_feature(enable = "avx512f")]
9214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9215	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216	#[rustc_legacy_const_generics(`3`)]
9217	pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218	a: __m512d,
9219	b: __m512d,
9220	c: __m512d,
9221	) -> __m512d {
9222	unsafe {
9223	static_assert_rounding!(ROUNDING);
9224	vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING)
9225	}
9226	}
9227
9228	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229	///
9230	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236	///
9237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238	#[inline]
9239	#[target_feature(enable = "avx512f")]
9240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9241	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242	#[rustc_legacy_const_generics(`4`)]
9243	pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244	a: __m512d,
9245	k: __mmask8,
9246	b: __m512d,
9247	c: __m512d,
9248	) -> __m512d {
9249	unsafe {
9250	static_assert_rounding!(ROUNDING);
9251	let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9252	simd_select_bitmask(m:k, yes:r, no:a)
9253	}
9254	}
9255
9256	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257	///
9258	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264	///
9265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266	#[inline]
9267	#[target_feature(enable = "avx512f")]
9268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270	#[rustc_legacy_const_generics(`4`)]
9271	pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272	k: __mmask8,
9273	a: __m512d,
9274	b: __m512d,
9275	c: __m512d,
9276	) -> __m512d {
9277	unsafe {
9278	static_assert_rounding!(ROUNDING);
9279	let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9280	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9281	}
9282	}
9283
9284	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285	///
9286	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292	///
9293	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294	#[inline]
9295	#[target_feature(enable = "avx512f")]
9296	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9297	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298	#[rustc_legacy_const_generics(`4`)]
9299	pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300	a: __m512d,
9301	b: __m512d,
9302	c: __m512d,
9303	k: __mmask8,
9304	) -> __m512d {
9305	unsafe {
9306	static_assert_rounding!(ROUNDING);
9307	let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9308	simd_select_bitmask(m:k, yes:r, no:c)
9309	}
9310	}
9311
9312	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313	///
9314	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320	///
9321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322	#[inline]
9323	#[target_feature(enable = "avx512f")]
9324	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9325	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326	#[rustc_legacy_const_generics(`3`)]
9327	pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328	unsafe {
9329	static_assert_rounding!(ROUNDING);
9330	vfmadd132psround(a:simd_neg(a), b, c, ROUNDING)
9331	}
9332	}
9333
9334	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335	///
9336	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342	///
9343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344	#[inline]
9345	#[target_feature(enable = "avx512f")]
9346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348	#[rustc_legacy_const_generics(`4`)]
9349	pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350	a: __m512,
9351	k: __mmask16,
9352	b: __m512,
9353	c: __m512,
9354	) -> __m512 {
9355	unsafe {
9356	static_assert_rounding!(ROUNDING);
9357	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9358	simd_select_bitmask(m:k, yes:r, no:a)
9359	}
9360	}
9361
9362	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363	///
9364	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370	///
9371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372	#[inline]
9373	#[target_feature(enable = "avx512f")]
9374	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9375	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376	#[rustc_legacy_const_generics(`4`)]
9377	pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378	k: __mmask16,
9379	a: __m512,
9380	b: __m512,
9381	c: __m512,
9382	) -> __m512 {
9383	unsafe {
9384	static_assert_rounding!(ROUNDING);
9385	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9386	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9387	}
9388	}
9389
9390	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391	///
9392	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398	///
9399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400	#[inline]
9401	#[target_feature(enable = "avx512f")]
9402	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9403	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404	#[rustc_legacy_const_generics(`4`)]
9405	pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406	a: __m512,
9407	b: __m512,
9408	c: __m512,
9409	k: __mmask16,
9410	) -> __m512 {
9411	unsafe {
9412	static_assert_rounding!(ROUNDING);
9413	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9414	simd_select_bitmask(m:k, yes:r, no:c)
9415	}
9416	}
9417
9418	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419	///
9420	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426	///
9427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428	#[inline]
9429	#[target_feature(enable = "avx512f")]
9430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9431	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432	#[rustc_legacy_const_generics(`3`)]
9433	pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434	unsafe {
9435	static_assert_rounding!(ROUNDING);
9436	vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING)
9437	}
9438	}
9439
9440	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441	///
9442	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448	///
9449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450	#[inline]
9451	#[target_feature(enable = "avx512f")]
9452	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9453	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454	#[rustc_legacy_const_generics(`4`)]
9455	pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456	a: __m512d,
9457	k: __mmask8,
9458	b: __m512d,
9459	c: __m512d,
9460	) -> __m512d {
9461	unsafe {
9462	static_assert_rounding!(ROUNDING);
9463	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9464	simd_select_bitmask(m:k, yes:r, no:a)
9465	}
9466	}
9467
9468	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469	///
9470	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476	///
9477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478	#[inline]
9479	#[target_feature(enable = "avx512f")]
9480	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482	#[rustc_legacy_const_generics(`4`)]
9483	pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484	k: __mmask8,
9485	a: __m512d,
9486	b: __m512d,
9487	c: __m512d,
9488	) -> __m512d {
9489	unsafe {
9490	static_assert_rounding!(ROUNDING);
9491	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9492	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9493	}
9494	}
9495
9496	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497	///
9498	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504	///
9505	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506	#[inline]
9507	#[target_feature(enable = "avx512f")]
9508	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9509	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510	#[rustc_legacy_const_generics(`4`)]
9511	pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512	a: __m512d,
9513	b: __m512d,
9514	c: __m512d,
9515	k: __mmask8,
9516	) -> __m512d {
9517	unsafe {
9518	static_assert_rounding!(ROUNDING);
9519	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9520	simd_select_bitmask(m:k, yes:r, no:c)
9521	}
9522	}
9523
9524	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525	///
9526	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532	///
9533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534	#[inline]
9535	#[target_feature(enable = "avx512f")]
9536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9537	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538	#[rustc_legacy_const_generics(`3`)]
9539	pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540	unsafe {
9541	static_assert_rounding!(ROUNDING);
9542	vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9543	}
9544	}
9545
9546	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547	///
9548	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554	///
9555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556	#[inline]
9557	#[target_feature(enable = "avx512f")]
9558	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9559	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560	#[rustc_legacy_const_generics(`4`)]
9561	pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562	a: __m512,
9563	k: __mmask16,
9564	b: __m512,
9565	c: __m512,
9566	) -> __m512 {
9567	unsafe {
9568	static_assert_rounding!(ROUNDING);
9569	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9570	simd_select_bitmask(m:k, yes:r, no:a)
9571	}
9572	}
9573
9574	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575	///
9576	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582	///
9583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584	#[inline]
9585	#[target_feature(enable = "avx512f")]
9586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9587	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588	#[rustc_legacy_const_generics(`4`)]
9589	pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590	k: __mmask16,
9591	a: __m512,
9592	b: __m512,
9593	c: __m512,
9594	) -> __m512 {
9595	unsafe {
9596	static_assert_rounding!(ROUNDING);
9597	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9598	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9599	}
9600	}
9601
9602	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603	///
9604	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610	///
9611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612	#[inline]
9613	#[target_feature(enable = "avx512f")]
9614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9615	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616	#[rustc_legacy_const_generics(`4`)]
9617	pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618	a: __m512,
9619	b: __m512,
9620	c: __m512,
9621	k: __mmask16,
9622	) -> __m512 {
9623	unsafe {
9624	static_assert_rounding!(ROUNDING);
9625	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9626	simd_select_bitmask(m:k, yes:r, no:c)
9627	}
9628	}
9629
9630	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631	///
9632	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638	///
9639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640	#[inline]
9641	#[target_feature(enable = "avx512f")]
9642	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9643	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644	#[rustc_legacy_const_generics(`3`)]
9645	pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646	unsafe {
9647	static_assert_rounding!(ROUNDING);
9648	vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9649	}
9650	}
9651
9652	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653	///
9654	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660	///
9661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662	#[inline]
9663	#[target_feature(enable = "avx512f")]
9664	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9665	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666	#[rustc_legacy_const_generics(`4`)]
9667	pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668	a: __m512d,
9669	k: __mmask8,
9670	b: __m512d,
9671	c: __m512d,
9672	) -> __m512d {
9673	unsafe {
9674	static_assert_rounding!(ROUNDING);
9675	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9676	simd_select_bitmask(m:k, yes:r, no:a)
9677	}
9678	}
9679
9680	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681	///
9682	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688	///
9689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690	#[inline]
9691	#[target_feature(enable = "avx512f")]
9692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9693	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694	#[rustc_legacy_const_generics(`4`)]
9695	pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696	k: __mmask8,
9697	a: __m512d,
9698	b: __m512d,
9699	c: __m512d,
9700	) -> __m512d {
9701	unsafe {
9702	static_assert_rounding!(ROUNDING);
9703	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9704	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9705	}
9706	}
9707
9708	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709	///
9710	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716	///
9717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718	#[inline]
9719	#[target_feature(enable = "avx512f")]
9720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9721	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722	#[rustc_legacy_const_generics(`4`)]
9723	pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724	a: __m512d,
9725	b: __m512d,
9726	c: __m512d,
9727	k: __mmask8,
9728	) -> __m512d {
9729	unsafe {
9730	static_assert_rounding!(ROUNDING);
9731	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9732	simd_select_bitmask(m:k, yes:r, no:c)
9733	}
9734	}
9735
9736	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738	///
9739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740	#[inline]
9741	#[target_feature(enable = "avx512f")]
9742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
9744	#[rustc_legacy_const_generics(`2`)]
9745	pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746	unsafe {
9747	static_assert_sae!(SAE);
9748	let a: f32x16 = a.as_f32x16();
9749	let b: f32x16 = b.as_f32x16();
9750	let r: f32x16 = vmaxps(a, b, SAE);
9751	transmute(src:r)
9752	}
9753	}
9754
9755	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757	///
9758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759	#[inline]
9760	#[target_feature(enable = "avx512f")]
9761	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9762	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
9763	#[rustc_legacy_const_generics(`4`)]
9764	pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765	src: __m512,
9766	k: __mmask16,
9767	a: __m512,
9768	b: __m512,
9769	) -> __m512 {
9770	unsafe {
9771	static_assert_sae!(SAE);
9772	let a: f32x16 = a.as_f32x16();
9773	let b: f32x16 = b.as_f32x16();
9774	let r: f32x16 = vmaxps(a, b, SAE);
9775	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9776	}
9777	}
9778
9779	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781	///
9782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783	#[inline]
9784	#[target_feature(enable = "avx512f")]
9785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9786	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
9787	#[rustc_legacy_const_generics(`3`)]
9788	pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789	unsafe {
9790	static_assert_sae!(SAE);
9791	let a: f32x16 = a.as_f32x16();
9792	let b: f32x16 = b.as_f32x16();
9793	let r: f32x16 = vmaxps(a, b, SAE);
9794	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
9795	}
9796	}
9797
9798	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800	///
9801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802	#[inline]
9803	#[target_feature(enable = "avx512f")]
9804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9805	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
9806	#[rustc_legacy_const_generics(`2`)]
9807	pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808	unsafe {
9809	static_assert_sae!(SAE);
9810	let a: f64x8 = a.as_f64x8();
9811	let b: f64x8 = b.as_f64x8();
9812	let r: f64x8 = vmaxpd(a, b, SAE);
9813	transmute(src:r)
9814	}
9815	}
9816
9817	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819	///
9820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821	#[inline]
9822	#[target_feature(enable = "avx512f")]
9823	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9824	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
9825	#[rustc_legacy_const_generics(`4`)]
9826	pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827	src: __m512d,
9828	k: __mmask8,
9829	a: __m512d,
9830	b: __m512d,
9831	) -> __m512d {
9832	unsafe {
9833	static_assert_sae!(SAE);
9834	let a: f64x8 = a.as_f64x8();
9835	let b: f64x8 = b.as_f64x8();
9836	let r: f64x8 = vmaxpd(a, b, SAE);
9837	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9838	}
9839	}
9840
9841	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843	///
9844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845	#[inline]
9846	#[target_feature(enable = "avx512f")]
9847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9848	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
9849	#[rustc_legacy_const_generics(`3`)]
9850	pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851	unsafe {
9852	static_assert_sae!(SAE);
9853	let a: f64x8 = a.as_f64x8();
9854	let b: f64x8 = b.as_f64x8();
9855	let r: f64x8 = vmaxpd(a, b, SAE);
9856	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
9857	}
9858	}
9859
9860	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862	///
9863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864	#[inline]
9865	#[target_feature(enable = "avx512f")]
9866	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9867	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
9868	#[rustc_legacy_const_generics(`2`)]
9869	pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870	unsafe {
9871	static_assert_sae!(SAE);
9872	let a: f32x16 = a.as_f32x16();
9873	let b: f32x16 = b.as_f32x16();
9874	let r: f32x16 = vminps(a, b, SAE);
9875	transmute(src:r)
9876	}
9877	}
9878
9879	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881	///
9882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883	#[inline]
9884	#[target_feature(enable = "avx512f")]
9885	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9886	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
9887	#[rustc_legacy_const_generics(`4`)]
9888	pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889	src: __m512,
9890	k: __mmask16,
9891	a: __m512,
9892	b: __m512,
9893	) -> __m512 {
9894	unsafe {
9895	static_assert_sae!(SAE);
9896	let a: f32x16 = a.as_f32x16();
9897	let b: f32x16 = b.as_f32x16();
9898	let r: f32x16 = vminps(a, b, SAE);
9899	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9900	}
9901	}
9902
9903	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905	///
9906	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907	#[inline]
9908	#[target_feature(enable = "avx512f")]
9909	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9910	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
9911	#[rustc_legacy_const_generics(`3`)]
9912	pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913	unsafe {
9914	static_assert_sae!(SAE);
9915	let a: f32x16 = a.as_f32x16();
9916	let b: f32x16 = b.as_f32x16();
9917	let r: f32x16 = vminps(a, b, SAE);
9918	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
9919	}
9920	}
9921
9922	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924	///
9925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926	#[inline]
9927	#[target_feature(enable = "avx512f")]
9928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9929	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
9930	#[rustc_legacy_const_generics(`2`)]
9931	pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932	unsafe {
9933	static_assert_sae!(SAE);
9934	let a: f64x8 = a.as_f64x8();
9935	let b: f64x8 = b.as_f64x8();
9936	let r: f64x8 = vminpd(a, b, SAE);
9937	transmute(src:r)
9938	}
9939	}
9940
9941	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943	///
9944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945	#[inline]
9946	#[target_feature(enable = "avx512f")]
9947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
9949	#[rustc_legacy_const_generics(`4`)]
9950	pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951	src: __m512d,
9952	k: __mmask8,
9953	a: __m512d,
9954	b: __m512d,
9955	) -> __m512d {
9956	unsafe {
9957	static_assert_sae!(SAE);
9958	let a: f64x8 = a.as_f64x8();
9959	let b: f64x8 = b.as_f64x8();
9960	let r: f64x8 = vminpd(a, b, SAE);
9961	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9962	}
9963	}
9964
9965	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967	///
9968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969	#[inline]
9970	#[target_feature(enable = "avx512f")]
9971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9972	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
9973	#[rustc_legacy_const_generics(`3`)]
9974	pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975	unsafe {
9976	static_assert_sae!(SAE);
9977	let a: f64x8 = a.as_f64x8();
9978	let b: f64x8 = b.as_f64x8();
9979	let r: f64x8 = vminpd(a, b, SAE);
9980	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
9981	}
9982	}
9983
9984	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986	///
9987	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988	#[inline]
9989	#[target_feature(enable = "avx512f")]
9990	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9991	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
9992	#[rustc_legacy_const_generics(`1`)]
9993	pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994	unsafe {
9995	static_assert_sae!(SAE);
9996	let a: f32x16 = a.as_f32x16();
9997	let r: f32x16 = vgetexpps(a, src:f32x16::ZERO, m:`0b11111111_11111111`, SAE);
9998	transmute(src:r)
9999	}
10000	}
10001
10002	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004	///
10005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006	#[inline]
10007	#[target_feature(enable = "avx512f")]
10008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
10010	#[rustc_legacy_const_generics(`3`)]
10011	pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012	unsafe {
10013	static_assert_sae!(SAE);
10014	let a: f32x16 = a.as_f32x16();
10015	let src: f32x16 = src.as_f32x16();
10016	let r: f32x16 = vgetexpps(a, src, m:k, SAE);
10017	transmute(src:r)
10018	}
10019	}
10020
10021	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023	///
10024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025	#[inline]
10026	#[target_feature(enable = "avx512f")]
10027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
10029	#[rustc_legacy_const_generics(`2`)]
10030	pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031	unsafe {
10032	static_assert_sae!(SAE);
10033	let a: f32x16 = a.as_f32x16();
10034	let r: f32x16 = vgetexpps(a, src:f32x16::ZERO, m:k, SAE);
10035	transmute(src:r)
10036	}
10037	}
10038
10039	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041	///
10042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043	#[inline]
10044	#[target_feature(enable = "avx512f")]
10045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10046	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
10047	#[rustc_legacy_const_generics(`1`)]
10048	pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049	unsafe {
10050	static_assert_sae!(SAE);
10051	let a: f64x8 = a.as_f64x8();
10052	let r: f64x8 = vgetexppd(a, src:f64x8::ZERO, m:`0b11111111`, SAE);
10053	transmute(src:r)
10054	}
10055	}
10056
10057	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059	///
10060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061	#[inline]
10062	#[target_feature(enable = "avx512f")]
10063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10064	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
10065	#[rustc_legacy_const_generics(`3`)]
10066	pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067	src: __m512d,
10068	k: __mmask8,
10069	a: __m512d,
10070	) -> __m512d {
10071	unsafe {
10072	static_assert_sae!(SAE);
10073	let a: f64x8 = a.as_f64x8();
10074	let src: f64x8 = src.as_f64x8();
10075	let r: f64x8 = vgetexppd(a, src, m:k, SAE);
10076	transmute(src:r)
10077	}
10078	}
10079
10080	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082	///
10083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084	#[inline]
10085	#[target_feature(enable = "avx512f")]
10086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10087	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
10088	#[rustc_legacy_const_generics(`2`)]
10089	pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090	unsafe {
10091	static_assert_sae!(SAE);
10092	let a: f64x8 = a.as_f64x8();
10093	let r: f64x8 = vgetexppd(a, src:f64x8::ZERO, m:k, SAE);
10094	transmute(src:r)
10095	}
10096	}
10097
10098	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105	///
10106	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108	#[inline]
10109	#[target_feature(enable = "avx512f")]
10110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
10112	#[rustc_legacy_const_generics(`1`, `2`)]
10113	pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114	unsafe {
10115	static_assert_uimm_bits!(IMM8, `8`);
10116	static_assert_mantissas_sae!(SAE);
10117	let a: f32x16 = a.as_f32x16();
10118	let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:`0b11111111_11111111`, SAE);
10119	transmute(src:r)
10120	}
10121	}
10122
10123	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130	///
10131	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133	#[inline]
10134	#[target_feature(enable = "avx512f")]
10135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10136	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
10137	#[rustc_legacy_const_generics(`3`, `4`)]
10138	pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139	src: __m512,
10140	k: __mmask16,
10141	a: __m512,
10142	) -> __m512 {
10143	unsafe {
10144	static_assert_uimm_bits!(IMM8, `8`);
10145	static_assert_mantissas_sae!(SAE);
10146	let a: f32x16 = a.as_f32x16();
10147	let src: f32x16 = src.as_f32x16();
10148	let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, SAE);
10149	transmute(src:r)
10150	}
10151	}
10152
10153	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160	///
10161	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163	#[inline]
10164	#[target_feature(enable = "avx512f")]
10165	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
10167	#[rustc_legacy_const_generics(`2`, `3`)]
10168	pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169	k: __mmask16,
10170	a: __m512,
10171	) -> __m512 {
10172	unsafe {
10173	static_assert_uimm_bits!(IMM8, `8`);
10174	static_assert_mantissas_sae!(SAE);
10175	let a: f32x16 = a.as_f32x16();
10176	let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, SAE);
10177	transmute(src:r)
10178	}
10179	}
10180
10181	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188	///
10189	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191	#[inline]
10192	#[target_feature(enable = "avx512f")]
10193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10194	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10195	#[rustc_legacy_const_generics(`1`, `2`)]
10196	pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197	unsafe {
10198	static_assert_uimm_bits!(IMM8, `8`);
10199	static_assert_mantissas_sae!(SAE);
10200	let a: f64x8 = a.as_f64x8();
10201	let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:`0b11111111`, SAE);
10202	transmute(src:r)
10203	}
10204	}
10205
10206	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213	///
10214	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216	#[inline]
10217	#[target_feature(enable = "avx512f")]
10218	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10220	#[rustc_legacy_const_generics(`3`, `4`)]
10221	pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222	src: __m512d,
10223	k: __mmask8,
10224	a: __m512d,
10225	) -> __m512d {
10226	unsafe {
10227	static_assert_uimm_bits!(IMM8, `8`);
10228	static_assert_mantissas_sae!(SAE);
10229	let a: f64x8 = a.as_f64x8();
10230	let src: f64x8 = src.as_f64x8();
10231	let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, SAE);
10232	transmute(src:r)
10233	}
10234	}
10235
10236	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243	///
10244	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246	#[inline]
10247	#[target_feature(enable = "avx512f")]
10248	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10249	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10250	#[rustc_legacy_const_generics(`2`, `3`)]
10251	pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252	k: __mmask8,
10253	a: __m512d,
10254	) -> __m512d {
10255	unsafe {
10256	static_assert_uimm_bits!(IMM8, `8`);
10257	static_assert_mantissas_sae!(SAE);
10258	let a: f64x8 = a.as_f64x8();
10259	let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, SAE);
10260	transmute(src:r)
10261	}
10262	}
10263
10264	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265	///
10266	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272	///
10273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274	#[inline]
10275	#[target_feature(enable = "avx512f")]
10276	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10277	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10278	#[rustc_legacy_const_generics(`2`)]
10279	pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280	unsafe {
10281	static_assert_rounding!(ROUNDING);
10282	let a: f32x16 = a.as_f32x16();
10283	let b: f32x16 = b.as_f32x16();
10284	let r: f32x16 = vscalefps(a, b, src:f32x16::ZERO, mask:`0b11111111_11111111`, ROUNDING);
10285	transmute(src:r)
10286	}
10287	}
10288
10289	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290	///
10291	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297	///
10298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299	#[inline]
10300	#[target_feature(enable = "avx512f")]
10301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10302	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10303	#[rustc_legacy_const_generics(`4`)]
10304	pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305	src: __m512,
10306	k: __mmask16,
10307	a: __m512,
10308	b: __m512,
10309	) -> __m512 {
10310	unsafe {
10311	static_assert_rounding!(ROUNDING);
10312	let a: f32x16 = a.as_f32x16();
10313	let b: f32x16 = b.as_f32x16();
10314	let src: f32x16 = src.as_f32x16();
10315	let r: f32x16 = vscalefps(a, b, src, mask:k, ROUNDING);
10316	transmute(src:r)
10317	}
10318	}
10319
10320	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321	///
10322	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328	///
10329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330	#[inline]
10331	#[target_feature(enable = "avx512f")]
10332	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10334	#[rustc_legacy_const_generics(`3`)]
10335	pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336	k: __mmask16,
10337	a: __m512,
10338	b: __m512,
10339	) -> __m512 {
10340	unsafe {
10341	static_assert_rounding!(ROUNDING);
10342	let a: f32x16 = a.as_f32x16();
10343	let b: f32x16 = b.as_f32x16();
10344	let r: f32x16 = vscalefps(a, b, src:f32x16::ZERO, mask:k, ROUNDING);
10345	transmute(src:r)
10346	}
10347	}
10348
10349	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350	///
10351	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357	///
10358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359	#[inline]
10360	#[target_feature(enable = "avx512f")]
10361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10362	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10363	#[rustc_legacy_const_generics(`2`)]
10364	pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365	unsafe {
10366	static_assert_rounding!(ROUNDING);
10367	let a: f64x8 = a.as_f64x8();
10368	let b: f64x8 = b.as_f64x8();
10369	let r: f64x8 = vscalefpd(a, b, src:f64x8::ZERO, mask:`0b11111111`, ROUNDING);
10370	transmute(src:r)
10371	}
10372	}
10373
10374	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375	///
10376	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382	///
10383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384	#[inline]
10385	#[target_feature(enable = "avx512f")]
10386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10388	#[rustc_legacy_const_generics(`4`)]
10389	pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390	src: __m512d,
10391	k: __mmask8,
10392	a: __m512d,
10393	b: __m512d,
10394	) -> __m512d {
10395	unsafe {
10396	static_assert_rounding!(ROUNDING);
10397	let a: f64x8 = a.as_f64x8();
10398	let b: f64x8 = b.as_f64x8();
10399	let src: f64x8 = src.as_f64x8();
10400	let r: f64x8 = vscalefpd(a, b, src, mask:k, ROUNDING);
10401	transmute(src:r)
10402	}
10403	}
10404
10405	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406	///
10407	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413	///
10414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415	#[inline]
10416	#[target_feature(enable = "avx512f")]
10417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10418	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10419	#[rustc_legacy_const_generics(`3`)]
10420	pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421	k: __mmask8,
10422	a: __m512d,
10423	b: __m512d,
10424	) -> __m512d {
10425	unsafe {
10426	static_assert_rounding!(ROUNDING);
10427	let a: f64x8 = a.as_f64x8();
10428	let b: f64x8 = b.as_f64x8();
10429	let r: f64x8 = vscalefpd(a, b, src:f64x8::ZERO, mask:k, ROUNDING);
10430	transmute(src:r)
10431	}
10432	}
10433
10434	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435	///
10436	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438	#[inline]
10439	#[target_feature(enable = "avx512f")]
10440	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10441	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10442	#[rustc_legacy_const_generics(`3`, `4`)]
10443	pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444	a: __m512,
10445	b: __m512,
10446	c: __m512i,
10447	) -> __m512 {
10448	unsafe {
10449	static_assert_uimm_bits!(IMM8, `8`);
10450	static_assert_mantissas_sae!(SAE);
10451	let a: f32x16 = a.as_f32x16();
10452	let b: f32x16 = b.as_f32x16();
10453	let c: i32x16 = c.as_i32x16();
10454	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:`0b11111111_11111111`, SAE);
10455	transmute(src:r)
10456	}
10457	}
10458
10459	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460	///
10461	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463	#[inline]
10464	#[target_feature(enable = "avx512f")]
10465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10466	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10467	#[rustc_legacy_const_generics(`4`, `5`)]
10468	pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469	a: __m512,
10470	k: __mmask16,
10471	b: __m512,
10472	c: __m512i,
10473	) -> __m512 {
10474	unsafe {
10475	static_assert_uimm_bits!(IMM8, `8`);
10476	static_assert_mantissas_sae!(SAE);
10477	let a: f32x16 = a.as_f32x16();
10478	let b: f32x16 = b.as_f32x16();
10479	let c: i32x16 = c.as_i32x16();
10480	let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10481	transmute(src:r)
10482	}
10483	}
10484
10485	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486	///
10487	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489	#[inline]
10490	#[target_feature(enable = "avx512f")]
10491	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10492	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10493	#[rustc_legacy_const_generics(`4`, `5`)]
10494	pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495	k: __mmask16,
10496	a: __m512,
10497	b: __m512,
10498	c: __m512i,
10499	) -> __m512 {
10500	unsafe {
10501	static_assert_uimm_bits!(IMM8, `8`);
10502	static_assert_mantissas_sae!(SAE);
10503	let a: f32x16 = a.as_f32x16();
10504	let b: f32x16 = b.as_f32x16();
10505	let c: i32x16 = c.as_i32x16();
10506	let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10507	transmute(src:r)
10508	}
10509	}
10510
10511	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512	///
10513	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515	#[inline]
10516	#[target_feature(enable = "avx512f")]
10517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10518	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10519	#[rustc_legacy_const_generics(`3`, `4`)]
10520	pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521	a: __m512d,
10522	b: __m512d,
10523	c: __m512i,
10524	) -> __m512d {
10525	unsafe {
10526	static_assert_uimm_bits!(IMM8, `8`);
10527	static_assert_mantissas_sae!(SAE);
10528	let a: f64x8 = a.as_f64x8();
10529	let b: f64x8 = b.as_f64x8();
10530	let c: i64x8 = c.as_i64x8();
10531	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:`0b11111111`, SAE);
10532	transmute(src:r)
10533	}
10534	}
10535
10536	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537	///
10538	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540	#[inline]
10541	#[target_feature(enable = "avx512f")]
10542	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10543	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10544	#[rustc_legacy_const_generics(`4`, `5`)]
10545	pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546	a: __m512d,
10547	k: __mmask8,
10548	b: __m512d,
10549	c: __m512i,
10550	) -> __m512d {
10551	unsafe {
10552	static_assert_uimm_bits!(IMM8, `8`);
10553	static_assert_mantissas_sae!(SAE);
10554	let a: f64x8 = a.as_f64x8();
10555	let b: f64x8 = b.as_f64x8();
10556	let c: i64x8 = c.as_i64x8();
10557	let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10558	transmute(src:r)
10559	}
10560	}
10561
10562	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563	///
10564	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566	#[inline]
10567	#[target_feature(enable = "avx512f")]
10568	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10569	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10570	#[rustc_legacy_const_generics(`4`, `5`)]
10571	pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572	k: __mmask8,
10573	a: __m512d,
10574	b: __m512d,
10575	c: __m512i,
10576	) -> __m512d {
10577	unsafe {
10578	static_assert_uimm_bits!(IMM8, `8`);
10579	static_assert_mantissas_sae!(SAE);
10580	let a: f64x8 = a.as_f64x8();
10581	let b: f64x8 = b.as_f64x8();
10582	let c: i64x8 = c.as_i64x8();
10583	let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10584	transmute(src:r)
10585	}
10586	}
10587
10588	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10591	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10592	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10593	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594	/// The sign is determined by sc which can take the following values:\
10595	/// _MM_MANT_SIGN_src // sign = sign(src)\
10596	/// _MM_MANT_SIGN_zero // sign = 0\
10597	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10598	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599	///
10600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601	#[inline]
10602	#[target_feature(enable = "avx512f")]
10603	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10604	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10605	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
10606	pub fn _mm512_getmant_round_ps<
10607	const NORM: _MM_MANTISSA_NORM_ENUM,
10608	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609	const SAE: i32,
10610	>(
10611	a: __m512,
10612	) -> __m512 {
10613	unsafe {
10614	static_assert_uimm_bits!(NORM, `4`);
10615	static_assert_uimm_bits!(SIGN, `2`);
10616	static_assert_mantissas_sae!(SAE);
10617	let a: f32x16 = a.as_f32x16();
10618	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src:f32x16::ZERO, m:`0b11111111_11111111`, SAE);
10619	transmute(src:r)
10620	}
10621	}
10622
10623	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10626	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10627	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10628	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629	/// The sign is determined by sc which can take the following values:\
10630	/// _MM_MANT_SIGN_src // sign = sign(src)\
10631	/// _MM_MANT_SIGN_zero // sign = 0\
10632	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10633	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634	///
10635	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636	#[inline]
10637	#[target_feature(enable = "avx512f")]
10638	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10639	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10640	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
10641	pub fn _mm512_mask_getmant_round_ps<
10642	const NORM: _MM_MANTISSA_NORM_ENUM,
10643	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644	const SAE: i32,
10645	>(
10646	src: __m512,
10647	k: __mmask16,
10648	a: __m512,
10649	) -> __m512 {
10650	unsafe {
10651	static_assert_uimm_bits!(NORM, `4`);
10652	static_assert_uimm_bits!(SIGN, `2`);
10653	static_assert_mantissas_sae!(SAE);
10654	let a: f32x16 = a.as_f32x16();
10655	let src: f32x16 = src.as_f32x16();
10656	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src, m:k, SAE);
10657	transmute(src:r)
10658	}
10659	}
10660
10661	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10664	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10665	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10666	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667	/// The sign is determined by sc which can take the following values:\
10668	/// _MM_MANT_SIGN_src // sign = sign(src)\
10669	/// _MM_MANT_SIGN_zero // sign = 0\
10670	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10671	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672	///
10673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674	#[inline]
10675	#[target_feature(enable = "avx512f")]
10676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10678	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
10679	pub fn _mm512_maskz_getmant_round_ps<
10680	const NORM: _MM_MANTISSA_NORM_ENUM,
10681	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682	const SAE: i32,
10683	>(
10684	k: __mmask16,
10685	a: __m512,
10686	) -> __m512 {
10687	unsafe {
10688	static_assert_uimm_bits!(NORM, `4`);
10689	static_assert_uimm_bits!(SIGN, `2`);
10690	static_assert_mantissas_sae!(SAE);
10691	let a: f32x16 = a.as_f32x16();
10692	let r: f32x16 = vgetmantps(a, SIGN << `2` \| NORM, src:f32x16::ZERO, m:k, SAE);
10693	transmute(src:r)
10694	}
10695	}
10696
10697	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10700	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10701	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10702	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703	/// The sign is determined by sc which can take the following values:\
10704	/// _MM_MANT_SIGN_src // sign = sign(src)\
10705	/// _MM_MANT_SIGN_zero // sign = 0\
10706	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10707	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708	///
10709	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710	#[inline]
10711	#[target_feature(enable = "avx512f")]
10712	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10714	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
10715	pub fn _mm512_getmant_round_pd<
10716	const NORM: _MM_MANTISSA_NORM_ENUM,
10717	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718	const SAE: i32,
10719	>(
10720	a: __m512d,
10721	) -> __m512d {
10722	unsafe {
10723	static_assert_uimm_bits!(NORM, `4`);
10724	static_assert_uimm_bits!(SIGN, `2`);
10725	static_assert_mantissas_sae!(SAE);
10726	let a: f64x8 = a.as_f64x8();
10727	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src:f64x8::ZERO, m:`0b11111111`, SAE);
10728	transmute(src:r)
10729	}
10730	}
10731
10732	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10735	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10736	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10737	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738	/// The sign is determined by sc which can take the following values:\
10739	/// _MM_MANT_SIGN_src // sign = sign(src)\
10740	/// _MM_MANT_SIGN_zero // sign = 0\
10741	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10742	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743	///
10744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745	#[inline]
10746	#[target_feature(enable = "avx512f")]
10747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10748	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10749	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
10750	pub fn _mm512_mask_getmant_round_pd<
10751	const NORM: _MM_MANTISSA_NORM_ENUM,
10752	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753	const SAE: i32,
10754	>(
10755	src: __m512d,
10756	k: __mmask8,
10757	a: __m512d,
10758	) -> __m512d {
10759	unsafe {
10760	static_assert_uimm_bits!(NORM, `4`);
10761	static_assert_uimm_bits!(SIGN, `2`);
10762	static_assert_mantissas_sae!(SAE);
10763	let a: f64x8 = a.as_f64x8();
10764	let src: f64x8 = src.as_f64x8();
10765	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src, m:k, SAE);
10766	transmute(src:r)
10767	}
10768	}
10769
10770	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10773	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10774	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10775	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776	/// The sign is determined by sc which can take the following values:\
10777	/// _MM_MANT_SIGN_src // sign = sign(src)\
10778	/// _MM_MANT_SIGN_zero // sign = 0\
10779	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10780	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781	///
10782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783	#[inline]
10784	#[target_feature(enable = "avx512f")]
10785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10786	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10787	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
10788	pub fn _mm512_maskz_getmant_round_pd<
10789	const NORM: _MM_MANTISSA_NORM_ENUM,
10790	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791	const SAE: i32,
10792	>(
10793	k: __mmask8,
10794	a: __m512d,
10795	) -> __m512d {
10796	unsafe {
10797	static_assert_uimm_bits!(NORM, `4`);
10798	static_assert_uimm_bits!(SIGN, `2`);
10799	static_assert_mantissas_sae!(SAE);
10800	let a: f64x8 = a.as_f64x8();
10801	let r: f64x8 = vgetmantpd(a, SIGN << `2` \| NORM, src:f64x8::ZERO, m:k, SAE);
10802	transmute(src:r)
10803	}
10804	}
10805
10806	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807	///
10808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809	#[inline]
10810	#[target_feature(enable = "avx512f")]
10811	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10812	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813	pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814	unsafe {
10815	transmute(src:vcvtps2dq(
10816	a.as_f32x16(),
10817	src:i32x16::ZERO,
10818	mask:`0b11111111_11111111`,
10819	_MM_FROUND_CUR_DIRECTION,
10820	))
10821	}
10822	}
10823
10824	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825	///
10826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827	#[inline]
10828	#[target_feature(enable = "avx512f")]
10829	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10830	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831	pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832	unsafe {
10833	transmute(src:vcvtps2dq(
10834	a.as_f32x16(),
10835	src.as_i32x16(),
10836	mask:k,
10837	_MM_FROUND_CUR_DIRECTION,
10838	))
10839	}
10840	}
10841
10842	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843	///
10844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845	#[inline]
10846	#[target_feature(enable = "avx512f")]
10847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10848	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849	pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850	unsafe {
10851	transmute(src:vcvtps2dq(
10852	a.as_f32x16(),
10853	src:i32x16::ZERO,
10854	mask:k,
10855	_MM_FROUND_CUR_DIRECTION,
10856	))
10857	}
10858	}
10859
10860	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861	///
10862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863	#[inline]
10864	#[target_feature(enable = "avx512f,avx512vl")]
10865	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10866	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867	pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868	unsafe {
10869	let convert: __m256i = _mm256_cvtps_epi32(a);
10870	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
10871	}
10872	}
10873
10874	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875	///
10876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877	#[inline]
10878	#[target_feature(enable = "avx512f,avx512vl")]
10879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10880	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881	pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882	unsafe {
10883	let convert: __m256i = _mm256_cvtps_epi32(a);
10884	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:i32x8::ZERO))
10885	}
10886	}
10887
10888	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889	///
10890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891	#[inline]
10892	#[target_feature(enable = "avx512f,avx512vl")]
10893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895	pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896	unsafe {
10897	let convert: __m128i = _mm_cvtps_epi32(a);
10898	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
10899	}
10900	}
10901
10902	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903	///
10904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905	#[inline]
10906	#[target_feature(enable = "avx512f,avx512vl")]
10907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10908	#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909	pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910	unsafe {
10911	let convert: __m128i = _mm_cvtps_epi32(a);
10912	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
10913	}
10914	}
10915
10916	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917	///
10918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919	#[inline]
10920	#[target_feature(enable = "avx512f")]
10921	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10922	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923	pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924	unsafe {
10925	transmute(src:vcvtps2udq(
10926	a.as_f32x16(),
10927	src:u32x16::ZERO,
10928	mask:`0b11111111_11111111`,
10929	_MM_FROUND_CUR_DIRECTION,
10930	))
10931	}
10932	}
10933
10934	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935	///
10936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937	#[inline]
10938	#[target_feature(enable = "avx512f")]
10939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10940	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941	pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942	unsafe {
10943	transmute(src:vcvtps2udq(
10944	a.as_f32x16(),
10945	src.as_u32x16(),
10946	mask:k,
10947	_MM_FROUND_CUR_DIRECTION,
10948	))
10949	}
10950	}
10951
10952	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953	///
10954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955	#[inline]
10956	#[target_feature(enable = "avx512f")]
10957	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10958	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959	pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960	unsafe {
10961	transmute(src:vcvtps2udq(
10962	a.as_f32x16(),
10963	src:u32x16::ZERO,
10964	mask:k,
10965	_MM_FROUND_CUR_DIRECTION,
10966	))
10967	}
10968	}
10969
10970	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971	///
10972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973	#[inline]
10974	#[target_feature(enable = "avx512f,avx512vl")]
10975	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10976	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977	pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978	unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:`0b11111111`)) }
10979	}
10980
10981	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982	///
10983	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984	#[inline]
10985	#[target_feature(enable = "avx512f,avx512vl")]
10986	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10987	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988	pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989	unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
10990	}
10991
10992	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993	///
10994	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995	#[inline]
10996	#[target_feature(enable = "avx512f,avx512vl")]
10997	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998	#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999	pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000	unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
11001	}
11002
11003	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004	///
11005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006	#[inline]
11007	#[target_feature(enable = "avx512f,avx512vl")]
11008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11009	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010	pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011	unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
11012	}
11013
11014	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015	///
11016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017	#[inline]
11018	#[target_feature(enable = "avx512f,avx512vl")]
11019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11020	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021	pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022	unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
11023	}
11024
11025	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026	///
11027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028	#[inline]
11029	#[target_feature(enable = "avx512f,avx512vl")]
11030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11031	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032	pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033	unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
11034	}
11035
11036	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037	///
11038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039	#[inline]
11040	#[target_feature(enable = "avx512f")]
11041	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11042	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043	pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044	unsafe {
11045	transmute(src:vcvtps2pd(
11046	a.as_f32x8(),
11047	src:f64x8::ZERO,
11048	mask:`0b11111111`,
11049	_MM_FROUND_CUR_DIRECTION,
11050	))
11051	}
11052	}
11053
11054	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055	///
11056	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057	#[inline]
11058	#[target_feature(enable = "avx512f")]
11059	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11060	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061	pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062	unsafe {
11063	transmute(src:vcvtps2pd(
11064	a.as_f32x8(),
11065	src.as_f64x8(),
11066	mask:k,
11067	_MM_FROUND_CUR_DIRECTION,
11068	))
11069	}
11070	}
11071
11072	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073	///
11074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075	#[inline]
11076	#[target_feature(enable = "avx512f")]
11077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11078	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079	pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080	unsafe {
11081	transmute(src:vcvtps2pd(
11082	a.as_f32x8(),
11083	src:f64x8::ZERO,
11084	mask:k,
11085	_MM_FROUND_CUR_DIRECTION,
11086	))
11087	}
11088	}
11089
11090	/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091	///
11092	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093	#[inline]
11094	#[target_feature(enable = "avx512f")]
11095	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11096	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097	pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098	unsafe {
11099	transmute(src:vcvtps2pd(
11100	a:_mm512_castps512_ps256(v2).as_f32x8(),
11101	src:f64x8::ZERO,
11102	mask:`0b11111111`,
11103	_MM_FROUND_CUR_DIRECTION,
11104	))
11105	}
11106	}
11107
11108	/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109	///
11110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111	#[inline]
11112	#[target_feature(enable = "avx512f")]
11113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11114	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115	pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116	unsafe {
11117	transmute(src:vcvtps2pd(
11118	a:_mm512_castps512_ps256(v2).as_f32x8(),
11119	src.as_f64x8(),
11120	mask:k,
11121	_MM_FROUND_CUR_DIRECTION,
11122	))
11123	}
11124	}
11125
11126	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127	///
11128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129	#[inline]
11130	#[target_feature(enable = "avx512f")]
11131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11132	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133	pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134	unsafe {
11135	transmute(src:vcvtpd2ps(
11136	a.as_f64x8(),
11137	src:f32x8::ZERO,
11138	mask:`0b11111111`,
11139	_MM_FROUND_CUR_DIRECTION,
11140	))
11141	}
11142	}
11143
11144	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145	///
11146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147	#[inline]
11148	#[target_feature(enable = "avx512f")]
11149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151	pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152	unsafe {
11153	transmute(src:vcvtpd2ps(
11154	a.as_f64x8(),
11155	src.as_f32x8(),
11156	mask:k,
11157	_MM_FROUND_CUR_DIRECTION,
11158	))
11159	}
11160	}
11161
11162	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163	///
11164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165	#[inline]
11166	#[target_feature(enable = "avx512f")]
11167	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11168	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169	pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170	unsafe {
11171	transmute(src:vcvtpd2ps(
11172	a.as_f64x8(),
11173	src:f32x8::ZERO,
11174	mask:k,
11175	_MM_FROUND_CUR_DIRECTION,
11176	))
11177	}
11178	}
11179
11180	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181	///
11182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183	#[inline]
11184	#[target_feature(enable = "avx512f,avx512vl")]
11185	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187	pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188	unsafe {
11189	let convert: __m128 = _mm256_cvtpd_ps(a);
11190	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11191	}
11192	}
11193
11194	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195	///
11196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197	#[inline]
11198	#[target_feature(enable = "avx512f,avx512vl")]
11199	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11200	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201	pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202	unsafe {
11203	let convert: __m128 = _mm256_cvtpd_ps(a);
11204	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11205	}
11206	}
11207
11208	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209	///
11210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211	#[inline]
11212	#[target_feature(enable = "avx512f,avx512vl")]
11213	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11214	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215	pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216	unsafe {
11217	let convert: __m128 = _mm_cvtpd_ps(a);
11218	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11219	}
11220	}
11221
11222	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223	///
11224	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225	#[inline]
11226	#[target_feature(enable = "avx512f,avx512vl")]
11227	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11228	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229	pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230	unsafe {
11231	let convert: __m128 = _mm_cvtpd_ps(a);
11232	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11233	}
11234	}
11235
11236	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237	///
11238	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239	#[inline]
11240	#[target_feature(enable = "avx512f")]
11241	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11242	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243	pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244	unsafe {
11245	transmute(src:vcvtpd2dq(
11246	a.as_f64x8(),
11247	src:i32x8::ZERO,
11248	mask:`0b11111111`,
11249	_MM_FROUND_CUR_DIRECTION,
11250	))
11251	}
11252	}
11253
11254	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255	///
11256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257	#[inline]
11258	#[target_feature(enable = "avx512f")]
11259	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11260	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261	pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262	unsafe {
11263	transmute(src:vcvtpd2dq(
11264	a.as_f64x8(),
11265	src.as_i32x8(),
11266	mask:k,
11267	_MM_FROUND_CUR_DIRECTION,
11268	))
11269	}
11270	}
11271
11272	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273	///
11274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275	#[inline]
11276	#[target_feature(enable = "avx512f")]
11277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11278	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279	pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280	unsafe {
11281	transmute(src:vcvtpd2dq(
11282	a.as_f64x8(),
11283	src:i32x8::ZERO,
11284	mask:k,
11285	_MM_FROUND_CUR_DIRECTION,
11286	))
11287	}
11288	}
11289
11290	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291	///
11292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293	#[inline]
11294	#[target_feature(enable = "avx512f,avx512vl")]
11295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11296	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297	pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298	unsafe {
11299	let convert: __m128i = _mm256_cvtpd_epi32(a);
11300	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11301	}
11302	}
11303
11304	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305	///
11306	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307	#[inline]
11308	#[target_feature(enable = "avx512f,avx512vl")]
11309	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11310	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311	pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312	unsafe {
11313	let convert: __m128i = _mm256_cvtpd_epi32(a);
11314	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11315	}
11316	}
11317
11318	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319	///
11320	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321	#[inline]
11322	#[target_feature(enable = "avx512f,avx512vl")]
11323	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11324	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325	pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326	unsafe {
11327	let convert: __m128i = _mm_cvtpd_epi32(a);
11328	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11329	}
11330	}
11331
11332	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333	///
11334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335	#[inline]
11336	#[target_feature(enable = "avx512f,avx512vl")]
11337	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11338	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339	pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340	unsafe {
11341	let convert: __m128i = _mm_cvtpd_epi32(a);
11342	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11343	}
11344	}
11345
11346	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347	///
11348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349	#[inline]
11350	#[target_feature(enable = "avx512f")]
11351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11352	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353	pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354	unsafe {
11355	transmute(src:vcvtpd2udq(
11356	a.as_f64x8(),
11357	src:u32x8::ZERO,
11358	mask:`0b11111111`,
11359	_MM_FROUND_CUR_DIRECTION,
11360	))
11361	}
11362	}
11363
11364	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365	///
11366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367	#[inline]
11368	#[target_feature(enable = "avx512f")]
11369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11370	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371	pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372	unsafe {
11373	transmute(src:vcvtpd2udq(
11374	a.as_f64x8(),
11375	src.as_u32x8(),
11376	mask:k,
11377	_MM_FROUND_CUR_DIRECTION,
11378	))
11379	}
11380	}
11381
11382	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383	///
11384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385	#[inline]
11386	#[target_feature(enable = "avx512f")]
11387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11388	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389	pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390	unsafe {
11391	transmute(src:vcvtpd2udq(
11392	a.as_f64x8(),
11393	src:u32x8::ZERO,
11394	mask:k,
11395	_MM_FROUND_CUR_DIRECTION,
11396	))
11397	}
11398	}
11399
11400	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401	///
11402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403	#[inline]
11404	#[target_feature(enable = "avx512f,avx512vl")]
11405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11406	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407	pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408	unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
11409	}
11410
11411	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412	///
11413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414	#[inline]
11415	#[target_feature(enable = "avx512f,avx512vl")]
11416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11417	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418	pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419	unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), mask:k)) }
11420	}
11421
11422	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423	///
11424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425	#[inline]
11426	#[target_feature(enable = "avx512f,avx512vl")]
11427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11428	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429	pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430	unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:k)) }
11431	}
11432
11433	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434	///
11435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436	#[inline]
11437	#[target_feature(enable = "avx512f,avx512vl")]
11438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11439	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440	pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441	unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:`0b11111111`)) }
11442	}
11443
11444	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445	///
11446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447	#[inline]
11448	#[target_feature(enable = "avx512f,avx512vl")]
11449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11450	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451	pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452	unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), mask:k)) }
11453	}
11454
11455	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456	///
11457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458	#[inline]
11459	#[target_feature(enable = "avx512f,avx512vl")]
11460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462	pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463	unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:k)) }
11464	}
11465
11466	/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467	///
11468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469	#[inline]
11470	#[target_feature(enable = "avx512f")]
11471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11472	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473	pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474	unsafe {
11475	let r: f32x8 = vcvtpd2ps(
11476	a:v2.as_f64x8(),
11477	src:f32x8::ZERO,
11478	mask:`0b11111111`,
11479	_MM_FROUND_CUR_DIRECTION,
11480	);
11481	simd_shuffle!(
11482	r,
11483	f32x8::ZERO,
11484	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
11485	)
11486	}
11487	}
11488
11489	/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490	///
11491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492	#[inline]
11493	#[target_feature(enable = "avx512f")]
11494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496	pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497	unsafe {
11498	let r: f32x8 = vcvtpd2ps(
11499	a:v2.as_f64x8(),
11500	src:_mm512_castps512_ps256(src).as_f32x8(),
11501	mask:k,
11502	_MM_FROUND_CUR_DIRECTION,
11503	);
11504	simd_shuffle!(
11505	r,
11506	f32x8::ZERO,
11507	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
11508	)
11509	}
11510	}
11511
11512	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513	///
11514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515	#[inline]
11516	#[target_feature(enable = "avx512f")]
11517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11518	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519	pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520	unsafe {
11521	let a: i8x16 = a.as_i8x16();
11522	transmute::<i32x16, _>(src:simd_cast(a))
11523	}
11524	}
11525
11526	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527	///
11528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529	#[inline]
11530	#[target_feature(enable = "avx512f")]
11531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11532	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533	pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534	unsafe {
11535	let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11536	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11537	}
11538	}
11539
11540	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541	///
11542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543	#[inline]
11544	#[target_feature(enable = "avx512f")]
11545	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11546	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547	pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548	unsafe {
11549	let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11550	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11551	}
11552	}
11553
11554	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555	///
11556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557	#[inline]
11558	#[target_feature(enable = "avx512f,avx512vl")]
11559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11560	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561	pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562	unsafe {
11563	let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11564	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11565	}
11566	}
11567
11568	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569	///
11570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571	#[inline]
11572	#[target_feature(enable = "avx512f,avx512vl")]
11573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575	pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576	unsafe {
11577	let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11578	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11579	}
11580	}
11581
11582	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583	///
11584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585	#[inline]
11586	#[target_feature(enable = "avx512f,avx512vl")]
11587	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11588	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589	pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590	unsafe {
11591	let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11592	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11593	}
11594	}
11595
11596	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597	///
11598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599	#[inline]
11600	#[target_feature(enable = "avx512f,avx512vl")]
11601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11602	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603	pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604	unsafe {
11605	let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11606	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11607	}
11608	}
11609
11610	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611	///
11612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613	#[inline]
11614	#[target_feature(enable = "avx512f")]
11615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11616	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617	pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618	unsafe {
11619	let a: i8x16 = a.as_i8x16();
11620	let v64: i8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11621	transmute::<i64x8, _>(src:simd_cast(v64))
11622	}
11623	}
11624
11625	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626	///
11627	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628	#[inline]
11629	#[target_feature(enable = "avx512f")]
11630	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11631	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632	pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633	unsafe {
11634	let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11635	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11636	}
11637	}
11638
11639	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640	///
11641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642	#[inline]
11643	#[target_feature(enable = "avx512f")]
11644	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11645	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646	pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647	unsafe {
11648	let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11649	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11650	}
11651	}
11652
11653	/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654	///
11655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656	#[inline]
11657	#[target_feature(enable = "avx512f,avx512vl")]
11658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11659	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660	pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661	unsafe {
11662	let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11663	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11664	}
11665	}
11666
11667	/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668	///
11669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670	#[inline]
11671	#[target_feature(enable = "avx512f,avx512vl")]
11672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674	pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675	unsafe {
11676	let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11677	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11678	}
11679	}
11680
11681	/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682	///
11683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684	#[inline]
11685	#[target_feature(enable = "avx512f,avx512vl")]
11686	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688	pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689	unsafe {
11690	let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11691	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11692	}
11693	}
11694
11695	/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696	///
11697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698	#[inline]
11699	#[target_feature(enable = "avx512f,avx512vl")]
11700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11701	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702	pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703	unsafe {
11704	let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11705	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11706	}
11707	}
11708
11709	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710	///
11711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712	#[inline]
11713	#[target_feature(enable = "avx512f")]
11714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11715	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716	pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717	unsafe {
11718	let a: u8x16 = a.as_u8x16();
11719	transmute::<i32x16, _>(src:simd_cast(a))
11720	}
11721	}
11722
11723	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724	///
11725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726	#[inline]
11727	#[target_feature(enable = "avx512f")]
11728	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11729	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730	pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731	unsafe {
11732	let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11733	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11734	}
11735	}
11736
11737	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738	///
11739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740	#[inline]
11741	#[target_feature(enable = "avx512f")]
11742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11743	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744	pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745	unsafe {
11746	let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11747	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11748	}
11749	}
11750
11751	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752	///
11753	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754	#[inline]
11755	#[target_feature(enable = "avx512f,avx512vl")]
11756	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758	pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759	unsafe {
11760	let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11761	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11762	}
11763	}
11764
11765	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766	///
11767	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768	#[inline]
11769	#[target_feature(enable = "avx512f,avx512vl")]
11770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772	pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773	unsafe {
11774	let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11775	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11776	}
11777	}
11778
11779	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780	///
11781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782	#[inline]
11783	#[target_feature(enable = "avx512f,avx512vl")]
11784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786	pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787	unsafe {
11788	let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11789	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11790	}
11791	}
11792
11793	/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794	///
11795	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796	#[inline]
11797	#[target_feature(enable = "avx512f,avx512vl")]
11798	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11799	#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800	pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801	unsafe {
11802	let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11803	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11804	}
11805	}
11806
11807	/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808	///
11809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810	#[inline]
11811	#[target_feature(enable = "avx512f")]
11812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11813	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814	pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815	unsafe {
11816	let a: u8x16 = a.as_u8x16();
11817	let v64: u8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11818	transmute::<i64x8, _>(src:simd_cast(v64))
11819	}
11820	}
11821
11822	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823	///
11824	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825	#[inline]
11826	#[target_feature(enable = "avx512f")]
11827	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11828	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829	pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830	unsafe {
11831	let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11832	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11833	}
11834	}
11835
11836	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837	///
11838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839	#[inline]
11840	#[target_feature(enable = "avx512f")]
11841	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11842	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843	pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844	unsafe {
11845	let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11846	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11847	}
11848	}
11849
11850	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851	///
11852	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853	#[inline]
11854	#[target_feature(enable = "avx512f,avx512vl")]
11855	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857	pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858	unsafe {
11859	let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11860	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11861	}
11862	}
11863
11864	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865	///
11866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867	#[inline]
11868	#[target_feature(enable = "avx512f,avx512vl")]
11869	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11870	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871	pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872	unsafe {
11873	let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11874	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11875	}
11876	}
11877
11878	/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879	///
11880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881	#[inline]
11882	#[target_feature(enable = "avx512f,avx512vl")]
11883	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11884	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885	pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886	unsafe {
11887	let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11888	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11889	}
11890	}
11891
11892	/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893	///
11894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895	#[inline]
11896	#[target_feature(enable = "avx512f,avx512vl")]
11897	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11898	#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899	pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900	unsafe {
11901	let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11902	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11903	}
11904	}
11905
11906	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907	///
11908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909	#[inline]
11910	#[target_feature(enable = "avx512f")]
11911	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11912	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913	pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914	unsafe {
11915	let a: i16x16 = a.as_i16x16();
11916	transmute::<i32x16, _>(src:simd_cast(a))
11917	}
11918	}
11919
11920	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921	///
11922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923	#[inline]
11924	#[target_feature(enable = "avx512f")]
11925	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11926	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927	pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928	unsafe {
11929	let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11930	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11931	}
11932	}
11933
11934	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935	///
11936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937	#[inline]
11938	#[target_feature(enable = "avx512f")]
11939	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11940	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941	pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942	unsafe {
11943	let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11944	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11945	}
11946	}
11947
11948	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949	///
11950	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951	#[inline]
11952	#[target_feature(enable = "avx512f,avx512vl")]
11953	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955	pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956	unsafe {
11957	let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11958	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11959	}
11960	}
11961
11962	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963	///
11964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965	#[inline]
11966	#[target_feature(enable = "avx512f,avx512vl")]
11967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11968	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969	pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970	unsafe {
11971	let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11972	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11973	}
11974	}
11975
11976	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977	///
11978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979	#[inline]
11980	#[target_feature(enable = "avx512f,avx512vl")]
11981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11982	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983	pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984	unsafe {
11985	let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11986	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11987	}
11988	}
11989
11990	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991	///
11992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993	#[inline]
11994	#[target_feature(enable = "avx512f,avx512vl")]
11995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996	#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997	pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998	unsafe {
11999	let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
12000	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12001	}
12002	}
12003
12004	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005	///
12006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007	#[inline]
12008	#[target_feature(enable = "avx512f")]
12009	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12010	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011	pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012	unsafe {
12013	let a: i16x8 = a.as_i16x8();
12014	transmute::<i64x8, _>(src:simd_cast(a))
12015	}
12016	}
12017
12018	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019	///
12020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021	#[inline]
12022	#[target_feature(enable = "avx512f")]
12023	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12024	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025	pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026	unsafe {
12027	let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
12028	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12029	}
12030	}
12031
12032	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033	///
12034	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035	#[inline]
12036	#[target_feature(enable = "avx512f")]
12037	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12038	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039	pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040	unsafe {
12041	let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
12042	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12043	}
12044	}
12045
12046	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047	///
12048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049	#[inline]
12050	#[target_feature(enable = "avx512f,avx512vl")]
12051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12052	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053	pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054	unsafe {
12055	let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
12056	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12057	}
12058	}
12059
12060	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061	///
12062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063	#[inline]
12064	#[target_feature(enable = "avx512f,avx512vl")]
12065	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12066	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067	pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068	unsafe {
12069	let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
12070	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12071	}
12072	}
12073
12074	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075	///
12076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077	#[inline]
12078	#[target_feature(enable = "avx512f,avx512vl")]
12079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12080	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081	pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082	unsafe {
12083	let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
12084	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12085	}
12086	}
12087
12088	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089	///
12090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091	#[inline]
12092	#[target_feature(enable = "avx512f,avx512vl")]
12093	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12094	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095	pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096	unsafe {
12097	let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
12098	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12099	}
12100	}
12101
12102	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103	///
12104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105	#[inline]
12106	#[target_feature(enable = "avx512f")]
12107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109	pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110	unsafe {
12111	let a: u16x16 = a.as_u16x16();
12112	transmute::<i32x16, _>(src:simd_cast(a))
12113	}
12114	}
12115
12116	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117	///
12118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119	#[inline]
12120	#[target_feature(enable = "avx512f")]
12121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12122	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123	pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124	unsafe {
12125	let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
12126	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12127	}
12128	}
12129
12130	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131	///
12132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133	#[inline]
12134	#[target_feature(enable = "avx512f")]
12135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12136	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137	pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138	unsafe {
12139	let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
12140	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12141	}
12142	}
12143
12144	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145	///
12146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147	#[inline]
12148	#[target_feature(enable = "avx512f,avx512vl")]
12149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12150	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151	pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152	unsafe {
12153	let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
12154	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12155	}
12156	}
12157
12158	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159	///
12160	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161	#[inline]
12162	#[target_feature(enable = "avx512f,avx512vl")]
12163	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12164	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165	pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166	unsafe {
12167	let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
12168	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12169	}
12170	}
12171
12172	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173	///
12174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175	#[inline]
12176	#[target_feature(enable = "avx512f,avx512vl")]
12177	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12178	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179	pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180	unsafe {
12181	let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
12182	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12183	}
12184	}
12185
12186	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187	///
12188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189	#[inline]
12190	#[target_feature(enable = "avx512f,avx512vl")]
12191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12192	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193	pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194	unsafe {
12195	let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
12196	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12197	}
12198	}
12199
12200	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201	///
12202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203	#[inline]
12204	#[target_feature(enable = "avx512f")]
12205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12206	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207	pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208	unsafe {
12209	let a: u16x8 = a.as_u16x8();
12210	transmute::<i64x8, _>(src:simd_cast(a))
12211	}
12212	}
12213
12214	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215	///
12216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217	#[inline]
12218	#[target_feature(enable = "avx512f")]
12219	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221	pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222	unsafe {
12223	let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
12224	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12225	}
12226	}
12227
12228	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229	///
12230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231	#[inline]
12232	#[target_feature(enable = "avx512f")]
12233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12234	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235	pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236	unsafe {
12237	let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
12238	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12239	}
12240	}
12241
12242	/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243	///
12244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245	#[inline]
12246	#[target_feature(enable = "avx512f,avx512vl")]
12247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12248	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249	pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250	unsafe {
12251	let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
12252	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12253	}
12254	}
12255
12256	/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257	///
12258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259	#[inline]
12260	#[target_feature(enable = "avx512f,avx512vl")]
12261	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12262	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263	pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264	unsafe {
12265	let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
12266	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12267	}
12268	}
12269
12270	/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271	///
12272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273	#[inline]
12274	#[target_feature(enable = "avx512f,avx512vl")]
12275	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12276	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277	pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278	unsafe {
12279	let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
12280	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12281	}
12282	}
12283
12284	/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285	///
12286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287	#[inline]
12288	#[target_feature(enable = "avx512f,avx512vl")]
12289	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12290	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291	pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292	unsafe {
12293	let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
12294	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12295	}
12296	}
12297
12298	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299	///
12300	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301	#[inline]
12302	#[target_feature(enable = "avx512f")]
12303	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12304	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305	pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306	unsafe {
12307	let a: i32x8 = a.as_i32x8();
12308	transmute::<i64x8, _>(src:simd_cast(a))
12309	}
12310	}
12311
12312	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313	///
12314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315	#[inline]
12316	#[target_feature(enable = "avx512f")]
12317	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12318	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319	pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320	unsafe {
12321	let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
12322	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12323	}
12324	}
12325
12326	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327	///
12328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329	#[inline]
12330	#[target_feature(enable = "avx512f")]
12331	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333	pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334	unsafe {
12335	let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
12336	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12337	}
12338	}
12339
12340	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341	///
12342	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343	#[inline]
12344	#[target_feature(enable = "avx512f,avx512vl")]
12345	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12346	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347	pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348	unsafe {
12349	let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12350	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12351	}
12352	}
12353
12354	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355	///
12356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357	#[inline]
12358	#[target_feature(enable = "avx512f,avx512vl")]
12359	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12360	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361	pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362	unsafe {
12363	let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12364	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12365	}
12366	}
12367
12368	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369	///
12370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371	#[inline]
12372	#[target_feature(enable = "avx512f,avx512vl")]
12373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12374	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375	pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376	unsafe {
12377	let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12378	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12379	}
12380	}
12381
12382	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383	///
12384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385	#[inline]
12386	#[target_feature(enable = "avx512f,avx512vl")]
12387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12388	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389	pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390	unsafe {
12391	let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12392	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12393	}
12394	}
12395
12396	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397	///
12398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399	#[inline]
12400	#[target_feature(enable = "avx512f")]
12401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12402	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403	pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404	unsafe {
12405	let a: u32x8 = a.as_u32x8();
12406	transmute::<i64x8, _>(src:simd_cast(a))
12407	}
12408	}
12409
12410	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411	///
12412	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413	#[inline]
12414	#[target_feature(enable = "avx512f")]
12415	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12416	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417	pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418	unsafe {
12419	let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12420	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12421	}
12422	}
12423
12424	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425	///
12426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427	#[inline]
12428	#[target_feature(enable = "avx512f")]
12429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12430	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431	pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432	unsafe {
12433	let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12434	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12435	}
12436	}
12437
12438	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439	///
12440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441	#[inline]
12442	#[target_feature(enable = "avx512f,avx512vl")]
12443	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445	pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446	unsafe {
12447	let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12448	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12449	}
12450	}
12451
12452	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453	///
12454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455	#[inline]
12456	#[target_feature(enable = "avx512f,avx512vl")]
12457	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12458	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459	pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460	unsafe {
12461	let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12462	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12463	}
12464	}
12465
12466	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467	///
12468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469	#[inline]
12470	#[target_feature(enable = "avx512f,avx512vl")]
12471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12472	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473	pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474	unsafe {
12475	let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12476	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12477	}
12478	}
12479
12480	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481	///
12482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483	#[inline]
12484	#[target_feature(enable = "avx512f,avx512vl")]
12485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487	pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488	unsafe {
12489	let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12490	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12491	}
12492	}
12493
12494	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495	///
12496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497	#[inline]
12498	#[target_feature(enable = "avx512f")]
12499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12500	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501	pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502	unsafe {
12503	let a: i32x16 = a.as_i32x16();
12504	transmute::<f32x16, _>(src:simd_cast(a))
12505	}
12506	}
12507
12508	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509	///
12510	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511	#[inline]
12512	#[target_feature(enable = "avx512f")]
12513	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12514	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515	pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516	unsafe {
12517	let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12518	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12519	}
12520	}
12521
12522	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523	///
12524	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525	#[inline]
12526	#[target_feature(enable = "avx512f")]
12527	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12528	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529	pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530	unsafe {
12531	let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12532	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12533	}
12534	}
12535
12536	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537	///
12538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539	#[inline]
12540	#[target_feature(enable = "avx512f,avx512vl")]
12541	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12542	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543	pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544	unsafe {
12545	let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12546	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12547	}
12548	}
12549
12550	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551	///
12552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553	#[inline]
12554	#[target_feature(enable = "avx512f,avx512vl")]
12555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557	pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558	unsafe {
12559	let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12560	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x8::ZERO))
12561	}
12562	}
12563
12564	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565	///
12566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567	#[inline]
12568	#[target_feature(enable = "avx512f,avx512vl")]
12569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12570	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571	pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572	unsafe {
12573	let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12574	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12575	}
12576	}
12577
12578	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579	///
12580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581	#[inline]
12582	#[target_feature(enable = "avx512f,avx512vl")]
12583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12584	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585	pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586	unsafe {
12587	let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12588	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x4::ZERO))
12589	}
12590	}
12591
12592	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593	///
12594	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595	#[inline]
12596	#[target_feature(enable = "avx512f")]
12597	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12598	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599	pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600	unsafe {
12601	let a: i32x8 = a.as_i32x8();
12602	transmute::<f64x8, _>(src:simd_cast(a))
12603	}
12604	}
12605
12606	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607	///
12608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609	#[inline]
12610	#[target_feature(enable = "avx512f")]
12611	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12612	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613	pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614	unsafe {
12615	let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12616	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12617	}
12618	}
12619
12620	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621	///
12622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623	#[inline]
12624	#[target_feature(enable = "avx512f")]
12625	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12626	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627	pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628	unsafe {
12629	let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12630	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12631	}
12632	}
12633
12634	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635	///
12636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637	#[inline]
12638	#[target_feature(enable = "avx512f,avx512vl")]
12639	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641	pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642	unsafe {
12643	let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12644	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12645	}
12646	}
12647
12648	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649	///
12650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651	#[inline]
12652	#[target_feature(enable = "avx512f,avx512vl")]
12653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655	pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656	unsafe {
12657	let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12658	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
12659	}
12660	}
12661
12662	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663	///
12664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665	#[inline]
12666	#[target_feature(enable = "avx512f,avx512vl")]
12667	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12668	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669	pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670	unsafe {
12671	let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12672	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12673	}
12674	}
12675
12676	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677	///
12678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679	#[inline]
12680	#[target_feature(enable = "avx512f,avx512vl")]
12681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683	pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684	unsafe {
12685	let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12686	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
12687	}
12688	}
12689
12690	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691	///
12692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693	#[inline]
12694	#[target_feature(enable = "avx512f")]
12695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12696	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697	pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698	unsafe {
12699	let a: u32x16 = a.as_u32x16();
12700	transmute::<f32x16, _>(src:simd_cast(a))
12701	}
12702	}
12703
12704	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705	///
12706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707	#[inline]
12708	#[target_feature(enable = "avx512f")]
12709	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12710	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711	pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712	unsafe {
12713	let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12714	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12715	}
12716	}
12717
12718	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719	///
12720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721	#[inline]
12722	#[target_feature(enable = "avx512f")]
12723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12724	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725	pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726	unsafe {
12727	let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12728	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12729	}
12730	}
12731
12732	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733	///
12734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735	#[inline]
12736	#[target_feature(enable = "avx512f")]
12737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12738	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739	pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740	unsafe {
12741	let a: u32x8 = a.as_u32x8();
12742	transmute::<f64x8, _>(src:simd_cast(a))
12743	}
12744	}
12745
12746	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747	///
12748	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749	#[inline]
12750	#[target_feature(enable = "avx512f")]
12751	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12752	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753	pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754	unsafe {
12755	let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12756	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12757	}
12758	}
12759
12760	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761	///
12762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763	#[inline]
12764	#[target_feature(enable = "avx512f")]
12765	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12766	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767	pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768	unsafe {
12769	let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12770	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12771	}
12772	}
12773
12774	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775	///
12776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777	#[inline]
12778	#[target_feature(enable = "avx512f,avx512vl")]
12779	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12780	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781	pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782	unsafe {
12783	let a: u32x4 = a.as_u32x4();
12784	transmute::<f64x4, _>(src:simd_cast(a))
12785	}
12786	}
12787
12788	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789	///
12790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791	#[inline]
12792	#[target_feature(enable = "avx512f,avx512vl")]
12793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795	pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796	unsafe {
12797	let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12798	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12799	}
12800	}
12801
12802	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803	///
12804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805	#[inline]
12806	#[target_feature(enable = "avx512f,avx512vl")]
12807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12808	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809	pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810	unsafe {
12811	let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12812	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
12813	}
12814	}
12815
12816	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817	///
12818	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819	#[inline]
12820	#[target_feature(enable = "avx512f,avx512vl")]
12821	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12822	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823	pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824	unsafe {
12825	let a: u32x4 = a.as_u32x4();
12826	let u64: u32x2 = simd_shuffle!(a, a, [`0`, `1`]);
12827	transmute::<f64x2, _>(src:simd_cast(u64))
12828	}
12829	}
12830
12831	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832	///
12833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834	#[inline]
12835	#[target_feature(enable = "avx512f,avx512vl")]
12836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12837	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838	pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839	unsafe {
12840	let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12841	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12842	}
12843	}
12844
12845	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846	///
12847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848	#[inline]
12849	#[target_feature(enable = "avx512f,avx512vl")]
12850	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12851	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852	pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853	unsafe {
12854	let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12855	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
12856	}
12857	}
12858
12859	/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860	///
12861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862	#[inline]
12863	#[target_feature(enable = "avx512f")]
12864	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12865	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866	pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867	unsafe {
12868	let v2: i32x16 = v2.as_i32x16();
12869	let v256: i32x8 = simd_shuffle!(v2, v2, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
12870	transmute::<f64x8, _>(src:simd_cast(v256))
12871	}
12872	}
12873
12874	/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875	///
12876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877	#[inline]
12878	#[target_feature(enable = "avx512f")]
12879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12880	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881	pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882	unsafe {
12883	let convert: f64x8 = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12885	}
12886	}
12887
12888	/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889	///
12890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891	#[inline]
12892	#[target_feature(enable = "avx512f")]
12893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12894	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895	pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896	unsafe {
12897	let v2: u32x16 = v2.as_u32x16();
12898	let v256: u32x8 = simd_shuffle!(v2, v2, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
12899	transmute::<f64x8, _>(src:simd_cast(v256))
12900	}
12901	}
12902
12903	/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904	///
12905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906	#[inline]
12907	#[target_feature(enable = "avx512f")]
12908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12909	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910	pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911	unsafe {
12912	let convert: f64x8 = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12914	}
12915	}
12916
12917	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918	///
12919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920	#[inline]
12921	#[target_feature(enable = "avx512f")]
12922	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12923	#[cfg_attr(test, assert_instr(vpmovdw))]
12924	pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925	unsafe {
12926	let a: i32x16 = a.as_i32x16();
12927	transmute::<i16x16, _>(src:simd_cast(a))
12928	}
12929	}
12930
12931	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932	///
12933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934	#[inline]
12935	#[target_feature(enable = "avx512f")]
12936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937	#[cfg_attr(test, assert_instr(vpmovdw))]
12938	pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939	unsafe {
12940	let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12941	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
12942	}
12943	}
12944
12945	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946	///
12947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948	#[inline]
12949	#[target_feature(enable = "avx512f")]
12950	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12951	#[cfg_attr(test, assert_instr(vpmovdw))]
12952	pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953	unsafe {
12954	let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12955	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x16::ZERO))
12956	}
12957	}
12958
12959	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960	///
12961	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962	#[inline]
12963	#[target_feature(enable = "avx512f,avx512vl")]
12964	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12965	#[cfg_attr(test, assert_instr(vpmovdw))]
12966	pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967	unsafe {
12968	let a: i32x8 = a.as_i32x8();
12969	transmute::<i16x8, _>(src:simd_cast(a))
12970	}
12971	}
12972
12973	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974	///
12975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976	#[inline]
12977	#[target_feature(enable = "avx512f,avx512vl")]
12978	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12979	#[cfg_attr(test, assert_instr(vpmovdw))]
12980	pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981	unsafe {
12982	let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12983	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12984	}
12985	}
12986
12987	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988	///
12989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990	#[inline]
12991	#[target_feature(enable = "avx512f,avx512vl")]
12992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12993	#[cfg_attr(test, assert_instr(vpmovdw))]
12994	pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995	unsafe {
12996	let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12997	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
12998	}
12999	}
13000
13001	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002	///
13003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004	#[inline]
13005	#[target_feature(enable = "avx512f,avx512vl")]
13006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13007	#[cfg_attr(test, assert_instr(vpmovdw))]
13008	pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009	unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13010	}
13011
13012	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013	///
13014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015	#[inline]
13016	#[target_feature(enable = "avx512f,avx512vl")]
13017	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13018	#[cfg_attr(test, assert_instr(vpmovdw))]
13019	pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020	unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13021	}
13022
13023	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024	///
13025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026	#[inline]
13027	#[target_feature(enable = "avx512f,avx512vl")]
13028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13029	#[cfg_attr(test, assert_instr(vpmovdw))]
13030	pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031	unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13032	}
13033
13034	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035	///
13036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037	#[inline]
13038	#[target_feature(enable = "avx512f")]
13039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13040	#[cfg_attr(test, assert_instr(vpmovdb))]
13041	pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042	unsafe {
13043	let a: i32x16 = a.as_i32x16();
13044	transmute::<i8x16, _>(src:simd_cast(a))
13045	}
13046	}
13047
13048	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049	///
13050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051	#[inline]
13052	#[target_feature(enable = "avx512f")]
13053	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13054	#[cfg_attr(test, assert_instr(vpmovdb))]
13055	pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056	unsafe {
13057	let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
13058	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
13059	}
13060	}
13061
13062	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063	///
13064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065	#[inline]
13066	#[target_feature(enable = "avx512f")]
13067	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13068	#[cfg_attr(test, assert_instr(vpmovdb))]
13069	pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070	unsafe {
13071	let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
13072	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i8x16::ZERO))
13073	}
13074	}
13075
13076	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077	///
13078	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079	#[inline]
13080	#[target_feature(enable = "avx512f,avx512vl")]
13081	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13082	#[cfg_attr(test, assert_instr(vpmovdb))]
13083	pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084	unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
13085	}
13086
13087	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088	///
13089	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090	#[inline]
13091	#[target_feature(enable = "avx512f,avx512vl")]
13092	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13093	#[cfg_attr(test, assert_instr(vpmovdb))]
13094	pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095	unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13096	}
13097
13098	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099	///
13100	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101	#[inline]
13102	#[target_feature(enable = "avx512f,avx512vl")]
13103	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13104	#[cfg_attr(test, assert_instr(vpmovdb))]
13105	pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106	unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13107	}
13108
13109	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110	///
13111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112	#[inline]
13113	#[target_feature(enable = "avx512f,avx512vl")]
13114	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115	#[cfg_attr(test, assert_instr(vpmovdb))]
13116	pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117	unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
13118	}
13119
13120	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121	///
13122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123	#[inline]
13124	#[target_feature(enable = "avx512f,avx512vl")]
13125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13126	#[cfg_attr(test, assert_instr(vpmovdb))]
13127	pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128	unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13129	}
13130
13131	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132	///
13133	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134	#[inline]
13135	#[target_feature(enable = "avx512f,avx512vl")]
13136	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13137	#[cfg_attr(test, assert_instr(vpmovdb))]
13138	pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139	unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13140	}
13141
13142	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143	///
13144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145	#[inline]
13146	#[target_feature(enable = "avx512f")]
13147	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13148	#[cfg_attr(test, assert_instr(vpmovqd))]
13149	pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150	unsafe {
13151	let a: i64x8 = a.as_i64x8();
13152	transmute::<i32x8, _>(src:simd_cast(a))
13153	}
13154	}
13155
13156	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157	///
13158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159	#[inline]
13160	#[target_feature(enable = "avx512f")]
13161	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13162	#[cfg_attr(test, assert_instr(vpmovqd))]
13163	pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164	unsafe {
13165	let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
13166	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
13167	}
13168	}
13169
13170	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171	///
13172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173	#[inline]
13174	#[target_feature(enable = "avx512f")]
13175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13176	#[cfg_attr(test, assert_instr(vpmovqd))]
13177	pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178	unsafe {
13179	let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
13180	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
13181	}
13182	}
13183
13184	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185	///
13186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187	#[inline]
13188	#[target_feature(enable = "avx512f,avx512vl")]
13189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190	#[cfg_attr(test, assert_instr(vpmovqd))]
13191	pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192	unsafe {
13193	let a: i64x4 = a.as_i64x4();
13194	transmute::<i32x4, _>(src:simd_cast(a))
13195	}
13196	}
13197
13198	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199	///
13200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201	#[inline]
13202	#[target_feature(enable = "avx512f,avx512vl")]
13203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204	#[cfg_attr(test, assert_instr(vpmovqd))]
13205	pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206	unsafe {
13207	let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
13208	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
13209	}
13210	}
13211
13212	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213	///
13214	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215	#[inline]
13216	#[target_feature(enable = "avx512f,avx512vl")]
13217	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13218	#[cfg_attr(test, assert_instr(vpmovqd))]
13219	pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220	unsafe {
13221	let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
13222	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
13223	}
13224	}
13225
13226	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227	///
13228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229	#[inline]
13230	#[target_feature(enable = "avx512f,avx512vl")]
13231	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13232	#[cfg_attr(test, assert_instr(vpmovqd))]
13233	pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234	unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:`0b11111111`)) }
13235	}
13236
13237	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238	///
13239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240	#[inline]
13241	#[target_feature(enable = "avx512f,avx512vl")]
13242	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13243	#[cfg_attr(test, assert_instr(vpmovqd))]
13244	pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245	unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13246	}
13247
13248	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249	///
13250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251	#[inline]
13252	#[target_feature(enable = "avx512f,avx512vl")]
13253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13254	#[cfg_attr(test, assert_instr(vpmovqd))]
13255	pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256	unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13257	}
13258
13259	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260	///
13261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262	#[inline]
13263	#[target_feature(enable = "avx512f")]
13264	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13265	#[cfg_attr(test, assert_instr(vpmovqw))]
13266	pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267	unsafe {
13268	let a: i64x8 = a.as_i64x8();
13269	transmute::<i16x8, _>(src:simd_cast(a))
13270	}
13271	}
13272
13273	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274	///
13275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276	#[inline]
13277	#[target_feature(enable = "avx512f")]
13278	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13279	#[cfg_attr(test, assert_instr(vpmovqw))]
13280	pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281	unsafe {
13282	let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
13283	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13284	}
13285	}
13286
13287	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288	///
13289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290	#[inline]
13291	#[target_feature(enable = "avx512f")]
13292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13293	#[cfg_attr(test, assert_instr(vpmovqw))]
13294	pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295	unsafe {
13296	let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
13297	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13298	}
13299	}
13300
13301	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302	///
13303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304	#[inline]
13305	#[target_feature(enable = "avx512f,avx512vl")]
13306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13307	#[cfg_attr(test, assert_instr(vpmovqw))]
13308	pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309	unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13310	}
13311
13312	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313	///
13314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315	#[inline]
13316	#[target_feature(enable = "avx512f,avx512vl")]
13317	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13318	#[cfg_attr(test, assert_instr(vpmovqw))]
13319	pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320	unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13321	}
13322
13323	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324	///
13325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326	#[inline]
13327	#[target_feature(enable = "avx512f,avx512vl")]
13328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13329	#[cfg_attr(test, assert_instr(vpmovqw))]
13330	pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331	unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13332	}
13333
13334	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335	///
13336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337	#[inline]
13338	#[target_feature(enable = "avx512f,avx512vl")]
13339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13340	#[cfg_attr(test, assert_instr(vpmovqw))]
13341	pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342	unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:`0b11111111`)) }
13343	}
13344
13345	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346	///
13347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348	#[inline]
13349	#[target_feature(enable = "avx512f,avx512vl")]
13350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13351	#[cfg_attr(test, assert_instr(vpmovqw))]
13352	pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353	unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13354	}
13355
13356	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357	///
13358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359	#[inline]
13360	#[target_feature(enable = "avx512f,avx512vl")]
13361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13362	#[cfg_attr(test, assert_instr(vpmovqw))]
13363	pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364	unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13365	}
13366
13367	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368	///
13369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370	#[inline]
13371	#[target_feature(enable = "avx512f")]
13372	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13373	#[cfg_attr(test, assert_instr(vpmovqb))]
13374	pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375	unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
13376	}
13377
13378	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379	///
13380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381	#[inline]
13382	#[target_feature(enable = "avx512f")]
13383	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13384	#[cfg_attr(test, assert_instr(vpmovqb))]
13385	pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386	unsafe { transmute(src:vpmovqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13387	}
13388
13389	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390	///
13391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392	#[inline]
13393	#[target_feature(enable = "avx512f")]
13394	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13395	#[cfg_attr(test, assert_instr(vpmovqb))]
13396	pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397	unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13398	}
13399
13400	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401	///
13402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403	#[inline]
13404	#[target_feature(enable = "avx512f,avx512vl")]
13405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13406	#[cfg_attr(test, assert_instr(vpmovqb))]
13407	pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408	unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
13409	}
13410
13411	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412	///
13413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414	#[inline]
13415	#[target_feature(enable = "avx512f,avx512vl")]
13416	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13417	#[cfg_attr(test, assert_instr(vpmovqb))]
13418	pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419	unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13420	}
13421
13422	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423	///
13424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425	#[inline]
13426	#[target_feature(enable = "avx512f,avx512vl")]
13427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428	#[cfg_attr(test, assert_instr(vpmovqb))]
13429	pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430	unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13431	}
13432
13433	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434	///
13435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436	#[inline]
13437	#[target_feature(enable = "avx512f,avx512vl")]
13438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13439	#[cfg_attr(test, assert_instr(vpmovqb))]
13440	pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441	unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:`0b11111111`)) }
13442	}
13443
13444	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445	///
13446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447	#[inline]
13448	#[target_feature(enable = "avx512f,avx512vl")]
13449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450	#[cfg_attr(test, assert_instr(vpmovqb))]
13451	pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452	unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13453	}
13454
13455	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456	///
13457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458	#[inline]
13459	#[target_feature(enable = "avx512f,avx512vl")]
13460	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13461	#[cfg_attr(test, assert_instr(vpmovqb))]
13462	pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463	unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13464	}
13465
13466	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467	///
13468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469	#[inline]
13470	#[target_feature(enable = "avx512f")]
13471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13472	#[cfg_attr(test, assert_instr(vpmovsdw))]
13473	pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474	unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:`0b11111111_11111111`)) }
13475	}
13476
13477	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478	///
13479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480	#[inline]
13481	#[target_feature(enable = "avx512f")]
13482	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13483	#[cfg_attr(test, assert_instr(vpmovsdw))]
13484	pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485	unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src.as_i16x16(), mask:k)) }
13486	}
13487
13488	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489	///
13490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491	#[inline]
13492	#[target_feature(enable = "avx512f")]
13493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13494	#[cfg_attr(test, assert_instr(vpmovsdw))]
13495	pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496	unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:k)) }
13497	}
13498
13499	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500	///
13501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502	#[inline]
13503	#[target_feature(enable = "avx512f,avx512vl")]
13504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13505	#[cfg_attr(test, assert_instr(vpmovsdw))]
13506	pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507	unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:`0b11111111`)) }
13508	}
13509
13510	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511	///
13512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513	#[inline]
13514	#[target_feature(enable = "avx512f,avx512vl")]
13515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13516	#[cfg_attr(test, assert_instr(vpmovsdw))]
13517	pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518	unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src.as_i16x8(), mask:k)) }
13519	}
13520
13521	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522	///
13523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524	#[inline]
13525	#[target_feature(enable = "avx512f,avx512vl")]
13526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13527	#[cfg_attr(test, assert_instr(vpmovsdw))]
13528	pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529	unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:k)) }
13530	}
13531
13532	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533	///
13534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535	#[inline]
13536	#[target_feature(enable = "avx512f,avx512vl")]
13537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13538	#[cfg_attr(test, assert_instr(vpmovsdw))]
13539	pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540	unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13541	}
13542
13543	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544	///
13545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546	#[inline]
13547	#[target_feature(enable = "avx512f,avx512vl")]
13548	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13549	#[cfg_attr(test, assert_instr(vpmovsdw))]
13550	pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551	unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13552	}
13553
13554	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555	///
13556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557	#[inline]
13558	#[target_feature(enable = "avx512f,avx512vl")]
13559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13560	#[cfg_attr(test, assert_instr(vpmovsdw))]
13561	pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562	unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13563	}
13564
13565	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566	///
13567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568	#[inline]
13569	#[target_feature(enable = "avx512f")]
13570	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13571	#[cfg_attr(test, assert_instr(vpmovsdb))]
13572	pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573	unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:`0b11111111_11111111`)) }
13574	}
13575
13576	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577	///
13578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579	#[inline]
13580	#[target_feature(enable = "avx512f")]
13581	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13582	#[cfg_attr(test, assert_instr(vpmovsdb))]
13583	pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584	unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src.as_i8x16(), mask:k)) }
13585	}
13586
13587	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588	///
13589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590	#[inline]
13591	#[target_feature(enable = "avx512f")]
13592	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13593	#[cfg_attr(test, assert_instr(vpmovsdb))]
13594	pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595	unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:k)) }
13596	}
13597
13598	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599	///
13600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601	#[inline]
13602	#[target_feature(enable = "avx512f,avx512vl")]
13603	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13604	#[cfg_attr(test, assert_instr(vpmovsdb))]
13605	pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606	unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
13607	}
13608
13609	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610	///
13611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612	#[inline]
13613	#[target_feature(enable = "avx512f,avx512vl")]
13614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13615	#[cfg_attr(test, assert_instr(vpmovsdb))]
13616	pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617	unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13618	}
13619
13620	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621	///
13622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623	#[inline]
13624	#[target_feature(enable = "avx512f,avx512vl")]
13625	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13626	#[cfg_attr(test, assert_instr(vpmovsdb))]
13627	pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628	unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13629	}
13630
13631	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632	///
13633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634	#[inline]
13635	#[target_feature(enable = "avx512f,avx512vl")]
13636	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13637	#[cfg_attr(test, assert_instr(vpmovsdb))]
13638	pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639	unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
13640	}
13641
13642	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643	///
13644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645	#[inline]
13646	#[target_feature(enable = "avx512f,avx512vl")]
13647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648	#[cfg_attr(test, assert_instr(vpmovsdb))]
13649	pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650	unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13651	}
13652
13653	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654	///
13655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656	#[inline]
13657	#[target_feature(enable = "avx512f,avx512vl")]
13658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659	#[cfg_attr(test, assert_instr(vpmovsdb))]
13660	pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661	unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13662	}
13663
13664	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665	///
13666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667	#[inline]
13668	#[target_feature(enable = "avx512f")]
13669	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13670	#[cfg_attr(test, assert_instr(vpmovsqd))]
13671	pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672	unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:`0b11111111`)) }
13673	}
13674
13675	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676	///
13677	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678	#[inline]
13679	#[target_feature(enable = "avx512f")]
13680	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13681	#[cfg_attr(test, assert_instr(vpmovsqd))]
13682	pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683	unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src.as_i32x8(), mask:k)) }
13684	}
13685
13686	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687	///
13688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689	#[inline]
13690	#[target_feature(enable = "avx512f")]
13691	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13692	#[cfg_attr(test, assert_instr(vpmovsqd))]
13693	pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694	unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:k)) }
13695	}
13696
13697	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698	///
13699	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700	#[inline]
13701	#[target_feature(enable = "avx512f,avx512vl")]
13702	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703	#[cfg_attr(test, assert_instr(vpmovsqd))]
13704	pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705	unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:`0b11111111`)) }
13706	}
13707
13708	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709	///
13710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711	#[inline]
13712	#[target_feature(enable = "avx512f,avx512vl")]
13713	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13714	#[cfg_attr(test, assert_instr(vpmovsqd))]
13715	pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716	unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src.as_i32x4(), mask:k)) }
13717	}
13718
13719	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720	///
13721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722	#[inline]
13723	#[target_feature(enable = "avx512f,avx512vl")]
13724	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725	#[cfg_attr(test, assert_instr(vpmovsqd))]
13726	pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727	unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:k)) }
13728	}
13729
13730	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731	///
13732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733	#[inline]
13734	#[target_feature(enable = "avx512f,avx512vl")]
13735	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13736	#[cfg_attr(test, assert_instr(vpmovsqd))]
13737	pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738	unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:`0b11111111`)) }
13739	}
13740
13741	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742	///
13743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744	#[inline]
13745	#[target_feature(enable = "avx512f,avx512vl")]
13746	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13747	#[cfg_attr(test, assert_instr(vpmovsqd))]
13748	pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749	unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13750	}
13751
13752	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753	///
13754	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755	#[inline]
13756	#[target_feature(enable = "avx512f,avx512vl")]
13757	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13758	#[cfg_attr(test, assert_instr(vpmovsqd))]
13759	pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760	unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13761	}
13762
13763	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764	///
13765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766	#[inline]
13767	#[target_feature(enable = "avx512f")]
13768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13769	#[cfg_attr(test, assert_instr(vpmovsqw))]
13770	pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771	unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:`0b11111111`)) }
13772	}
13773
13774	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775	///
13776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777	#[inline]
13778	#[target_feature(enable = "avx512f")]
13779	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13780	#[cfg_attr(test, assert_instr(vpmovsqw))]
13781	pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782	unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src.as_i16x8(), mask:k)) }
13783	}
13784
13785	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786	///
13787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788	#[inline]
13789	#[target_feature(enable = "avx512f")]
13790	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13791	#[cfg_attr(test, assert_instr(vpmovsqw))]
13792	pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793	unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:k)) }
13794	}
13795
13796	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797	///
13798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799	#[inline]
13800	#[target_feature(enable = "avx512f,avx512vl")]
13801	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13802	#[cfg_attr(test, assert_instr(vpmovsqw))]
13803	pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804	unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13805	}
13806
13807	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808	///
13809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810	#[inline]
13811	#[target_feature(enable = "avx512f,avx512vl")]
13812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13813	#[cfg_attr(test, assert_instr(vpmovsqw))]
13814	pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815	unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13816	}
13817
13818	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819	///
13820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821	#[inline]
13822	#[target_feature(enable = "avx512f,avx512vl")]
13823	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13824	#[cfg_attr(test, assert_instr(vpmovsqw))]
13825	pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826	unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13827	}
13828
13829	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830	///
13831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832	#[inline]
13833	#[target_feature(enable = "avx512f,avx512vl")]
13834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13835	#[cfg_attr(test, assert_instr(vpmovsqw))]
13836	pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837	unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:`0b11111111`)) }
13838	}
13839
13840	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841	///
13842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843	#[inline]
13844	#[target_feature(enable = "avx512f,avx512vl")]
13845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13846	#[cfg_attr(test, assert_instr(vpmovsqw))]
13847	pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848	unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13849	}
13850
13851	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852	///
13853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854	#[inline]
13855	#[target_feature(enable = "avx512f,avx512vl")]
13856	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13857	#[cfg_attr(test, assert_instr(vpmovsqw))]
13858	pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859	unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13860	}
13861
13862	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863	///
13864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865	#[inline]
13866	#[target_feature(enable = "avx512f")]
13867	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13868	#[cfg_attr(test, assert_instr(vpmovsqb))]
13869	pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870	unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
13871	}
13872
13873	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874	///
13875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876	#[inline]
13877	#[target_feature(enable = "avx512f")]
13878	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13879	#[cfg_attr(test, assert_instr(vpmovsqb))]
13880	pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881	unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13882	}
13883
13884	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885	///
13886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887	#[inline]
13888	#[target_feature(enable = "avx512f")]
13889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890	#[cfg_attr(test, assert_instr(vpmovsqb))]
13891	pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892	unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13893	}
13894
13895	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896	///
13897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898	#[inline]
13899	#[target_feature(enable = "avx512f,avx512vl")]
13900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901	#[cfg_attr(test, assert_instr(vpmovsqb))]
13902	pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903	unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
13904	}
13905
13906	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907	///
13908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909	#[inline]
13910	#[target_feature(enable = "avx512f,avx512vl")]
13911	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13912	#[cfg_attr(test, assert_instr(vpmovsqb))]
13913	pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914	unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13915	}
13916
13917	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918	///
13919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920	#[inline]
13921	#[target_feature(enable = "avx512f,avx512vl")]
13922	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13923	#[cfg_attr(test, assert_instr(vpmovsqb))]
13924	pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925	unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13926	}
13927
13928	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929	///
13930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931	#[inline]
13932	#[target_feature(enable = "avx512f,avx512vl")]
13933	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13934	#[cfg_attr(test, assert_instr(vpmovsqb))]
13935	pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936	unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:`0b11111111`)) }
13937	}
13938
13939	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940	///
13941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942	#[inline]
13943	#[target_feature(enable = "avx512f,avx512vl")]
13944	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13945	#[cfg_attr(test, assert_instr(vpmovsqb))]
13946	pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947	unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13948	}
13949
13950	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951	///
13952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953	#[inline]
13954	#[target_feature(enable = "avx512f,avx512vl")]
13955	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13956	#[cfg_attr(test, assert_instr(vpmovsqb))]
13957	pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958	unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13959	}
13960
13961	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962	///
13963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964	#[inline]
13965	#[target_feature(enable = "avx512f")]
13966	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13967	#[cfg_attr(test, assert_instr(vpmovusdw))]
13968	pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969	unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:`0b11111111_11111111`)) }
13970	}
13971
13972	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973	///
13974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975	#[inline]
13976	#[target_feature(enable = "avx512f")]
13977	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13978	#[cfg_attr(test, assert_instr(vpmovusdw))]
13979	pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980	unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src.as_u16x16(), mask:k)) }
13981	}
13982
13983	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984	///
13985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986	#[inline]
13987	#[target_feature(enable = "avx512f")]
13988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13989	#[cfg_attr(test, assert_instr(vpmovusdw))]
13990	pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991	unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:k)) }
13992	}
13993
13994	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995	///
13996	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997	#[inline]
13998	#[target_feature(enable = "avx512f,avx512vl")]
13999	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14000	#[cfg_attr(test, assert_instr(vpmovusdw))]
14001	pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002	unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:`0b11111111`)) }
14003	}
14004
14005	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006	///
14007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008	#[inline]
14009	#[target_feature(enable = "avx512f,avx512vl")]
14010	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14011	#[cfg_attr(test, assert_instr(vpmovusdw))]
14012	pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013	unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src.as_u16x8(), mask:k)) }
14014	}
14015
14016	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017	///
14018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019	#[inline]
14020	#[target_feature(enable = "avx512f,avx512vl")]
14021	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14022	#[cfg_attr(test, assert_instr(vpmovusdw))]
14023	pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024	unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:k)) }
14025	}
14026
14027	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028	///
14029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030	#[inline]
14031	#[target_feature(enable = "avx512f,avx512vl")]
14032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14033	#[cfg_attr(test, assert_instr(vpmovusdw))]
14034	pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035	unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:`0b11111111`)) }
14036	}
14037
14038	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039	///
14040	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041	#[inline]
14042	#[target_feature(enable = "avx512f,avx512vl")]
14043	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14044	#[cfg_attr(test, assert_instr(vpmovusdw))]
14045	pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046	unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src.as_u16x8(), mask:k)) }
14047	}
14048
14049	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050	///
14051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052	#[inline]
14053	#[target_feature(enable = "avx512f,avx512vl")]
14054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14055	#[cfg_attr(test, assert_instr(vpmovusdw))]
14056	pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057	unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:k)) }
14058	}
14059
14060	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061	///
14062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063	#[inline]
14064	#[target_feature(enable = "avx512f")]
14065	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14066	#[cfg_attr(test, assert_instr(vpmovusdb))]
14067	pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068	unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:`0b11111111_11111111`)) }
14069	}
14070
14071	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072	///
14073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074	#[inline]
14075	#[target_feature(enable = "avx512f")]
14076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14077	#[cfg_attr(test, assert_instr(vpmovusdb))]
14078	pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079	unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src.as_u8x16(), mask:k)) }
14080	}
14081
14082	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083	///
14084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085	#[inline]
14086	#[target_feature(enable = "avx512f")]
14087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14088	#[cfg_attr(test, assert_instr(vpmovusdb))]
14089	pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090	unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:k)) }
14091	}
14092
14093	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094	///
14095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096	#[inline]
14097	#[target_feature(enable = "avx512f,avx512vl")]
14098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14099	#[cfg_attr(test, assert_instr(vpmovusdb))]
14100	pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101	unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:`0b11111111`)) }
14102	}
14103
14104	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105	///
14106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107	#[inline]
14108	#[target_feature(enable = "avx512f,avx512vl")]
14109	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14110	#[cfg_attr(test, assert_instr(vpmovusdb))]
14111	pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112	unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src.as_u8x16(), mask:k)) }
14113	}
14114
14115	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116	///
14117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118	#[inline]
14119	#[target_feature(enable = "avx512f,avx512vl")]
14120	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14121	#[cfg_attr(test, assert_instr(vpmovusdb))]
14122	pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123	unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:k)) }
14124	}
14125
14126	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127	///
14128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129	#[inline]
14130	#[target_feature(enable = "avx512f,avx512vl")]
14131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132	#[cfg_attr(test, assert_instr(vpmovusdb))]
14133	pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134	unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:`0b11111111`)) }
14135	}
14136
14137	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138	///
14139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140	#[inline]
14141	#[target_feature(enable = "avx512f,avx512vl")]
14142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143	#[cfg_attr(test, assert_instr(vpmovusdb))]
14144	pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145	unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src.as_u8x16(), mask:k)) }
14146	}
14147
14148	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149	///
14150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151	#[inline]
14152	#[target_feature(enable = "avx512f,avx512vl")]
14153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154	#[cfg_attr(test, assert_instr(vpmovusdb))]
14155	pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156	unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:k)) }
14157	}
14158
14159	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160	///
14161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162	#[inline]
14163	#[target_feature(enable = "avx512f")]
14164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14165	#[cfg_attr(test, assert_instr(vpmovusqd))]
14166	pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167	unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:`0b11111111`)) }
14168	}
14169
14170	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171	///
14172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173	#[inline]
14174	#[target_feature(enable = "avx512f")]
14175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14176	#[cfg_attr(test, assert_instr(vpmovusqd))]
14177	pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178	unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src.as_u32x8(), mask:k)) }
14179	}
14180
14181	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182	///
14183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184	#[inline]
14185	#[target_feature(enable = "avx512f")]
14186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14187	#[cfg_attr(test, assert_instr(vpmovusqd))]
14188	pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189	unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:k)) }
14190	}
14191
14192	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193	///
14194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195	#[inline]
14196	#[target_feature(enable = "avx512f,avx512vl")]
14197	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14198	#[cfg_attr(test, assert_instr(vpmovusqd))]
14199	pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200	unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
14201	}
14202
14203	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204	///
14205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206	#[inline]
14207	#[target_feature(enable = "avx512f,avx512vl")]
14208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14209	#[cfg_attr(test, assert_instr(vpmovusqd))]
14210	pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211	unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src.as_u32x4(), mask:k)) }
14212	}
14213
14214	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215	///
14216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217	#[inline]
14218	#[target_feature(enable = "avx512f,avx512vl")]
14219	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14220	#[cfg_attr(test, assert_instr(vpmovusqd))]
14221	pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222	unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:k)) }
14223	}
14224
14225	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226	///
14227	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228	#[inline]
14229	#[target_feature(enable = "avx512f,avx512vl")]
14230	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14231	#[cfg_attr(test, assert_instr(vpmovusqd))]
14232	pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233	unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:`0b11111111`)) }
14234	}
14235
14236	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237	///
14238	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239	#[inline]
14240	#[target_feature(enable = "avx512f,avx512vl")]
14241	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14242	#[cfg_attr(test, assert_instr(vpmovusqd))]
14243	pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244	unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src.as_u32x4(), mask:k)) }
14245	}
14246
14247	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248	///
14249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250	#[inline]
14251	#[target_feature(enable = "avx512f,avx512vl")]
14252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14253	#[cfg_attr(test, assert_instr(vpmovusqd))]
14254	pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255	unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:k)) }
14256	}
14257
14258	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259	///
14260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261	#[inline]
14262	#[target_feature(enable = "avx512f")]
14263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14264	#[cfg_attr(test, assert_instr(vpmovusqw))]
14265	pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266	unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:`0b11111111`)) }
14267	}
14268
14269	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270	///
14271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272	#[inline]
14273	#[target_feature(enable = "avx512f")]
14274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14275	#[cfg_attr(test, assert_instr(vpmovusqw))]
14276	pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277	unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src.as_u16x8(), mask:k)) }
14278	}
14279
14280	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281	///
14282	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283	#[inline]
14284	#[target_feature(enable = "avx512f")]
14285	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14286	#[cfg_attr(test, assert_instr(vpmovusqw))]
14287	pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288	unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:k)) }
14289	}
14290
14291	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292	///
14293	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294	#[inline]
14295	#[target_feature(enable = "avx512f,avx512vl")]
14296	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14297	#[cfg_attr(test, assert_instr(vpmovusqw))]
14298	pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299	unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:`0b11111111`)) }
14300	}
14301
14302	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303	///
14304	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305	#[inline]
14306	#[target_feature(enable = "avx512f,avx512vl")]
14307	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14308	#[cfg_attr(test, assert_instr(vpmovusqw))]
14309	pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310	unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src.as_u16x8(), mask:k)) }
14311	}
14312
14313	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314	///
14315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316	#[inline]
14317	#[target_feature(enable = "avx512f,avx512vl")]
14318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319	#[cfg_attr(test, assert_instr(vpmovusqw))]
14320	pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321	unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:k)) }
14322	}
14323
14324	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325	///
14326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327	#[inline]
14328	#[target_feature(enable = "avx512f,avx512vl")]
14329	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330	#[cfg_attr(test, assert_instr(vpmovusqw))]
14331	pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332	unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:`0b11111111`)) }
14333	}
14334
14335	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336	///
14337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338	#[inline]
14339	#[target_feature(enable = "avx512f,avx512vl")]
14340	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14341	#[cfg_attr(test, assert_instr(vpmovusqw))]
14342	pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343	unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src.as_u16x8(), mask:k)) }
14344	}
14345
14346	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347	///
14348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349	#[inline]
14350	#[target_feature(enable = "avx512f,avx512vl")]
14351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14352	#[cfg_attr(test, assert_instr(vpmovusqw))]
14353	pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354	unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:k)) }
14355	}
14356
14357	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358	///
14359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360	#[inline]
14361	#[target_feature(enable = "avx512f")]
14362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14363	#[cfg_attr(test, assert_instr(vpmovusqb))]
14364	pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365	unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:`0b11111111`)) }
14366	}
14367
14368	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369	///
14370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371	#[inline]
14372	#[target_feature(enable = "avx512f")]
14373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14374	#[cfg_attr(test, assert_instr(vpmovusqb))]
14375	pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376	unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src.as_u8x16(), mask:k)) }
14377	}
14378
14379	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380	///
14381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382	#[inline]
14383	#[target_feature(enable = "avx512f")]
14384	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385	#[cfg_attr(test, assert_instr(vpmovusqb))]
14386	pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387	unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:k)) }
14388	}
14389
14390	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391	///
14392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393	#[inline]
14394	#[target_feature(enable = "avx512f,avx512vl")]
14395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14396	#[cfg_attr(test, assert_instr(vpmovusqb))]
14397	pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398	unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:`0b11111111`)) }
14399	}
14400
14401	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402	///
14403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404	#[inline]
14405	#[target_feature(enable = "avx512f,avx512vl")]
14406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14407	#[cfg_attr(test, assert_instr(vpmovusqb))]
14408	pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409	unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src.as_u8x16(), mask:k)) }
14410	}
14411
14412	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413	///
14414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415	#[inline]
14416	#[target_feature(enable = "avx512f,avx512vl")]
14417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14418	#[cfg_attr(test, assert_instr(vpmovusqb))]
14419	pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420	unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:k)) }
14421	}
14422
14423	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424	///
14425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426	#[inline]
14427	#[target_feature(enable = "avx512f,avx512vl")]
14428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14429	#[cfg_attr(test, assert_instr(vpmovusqb))]
14430	pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431	unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:`0b11111111`)) }
14432	}
14433
14434	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435	///
14436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437	#[inline]
14438	#[target_feature(enable = "avx512f,avx512vl")]
14439	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14440	#[cfg_attr(test, assert_instr(vpmovusqb))]
14441	pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442	unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src.as_u8x16(), mask:k)) }
14443	}
14444
14445	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446	///
14447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448	#[inline]
14449	#[target_feature(enable = "avx512f,avx512vl")]
14450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14451	#[cfg_attr(test, assert_instr(vpmovusqb))]
14452	pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453	unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:k)) }
14454	}
14455
14456	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457	///
14458	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464	///
14465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466	#[inline]
14467	#[target_feature(enable = "avx512f")]
14468	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14469	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14470	#[rustc_legacy_const_generics(`1`)]
14471	pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472	unsafe {
14473	static_assert_rounding!(ROUNDING);
14474	let a: f32x16 = a.as_f32x16();
14475	let r: i32x16 = vcvtps2dq(a, src:i32x16::ZERO, mask:`0b11111111_11111111`, ROUNDING);
14476	transmute(src:r)
14477	}
14478	}
14479
14480	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481	///
14482	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488	///
14489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490	#[inline]
14491	#[target_feature(enable = "avx512f")]
14492	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14493	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14494	#[rustc_legacy_const_generics(`3`)]
14495	pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496	src: __m512i,
14497	k: __mmask16,
14498	a: __m512,
14499	) -> __m512i {
14500	unsafe {
14501	static_assert_rounding!(ROUNDING);
14502	let a: f32x16 = a.as_f32x16();
14503	let src: i32x16 = src.as_i32x16();
14504	let r: i32x16 = vcvtps2dq(a, src, mask:k, ROUNDING);
14505	transmute(src:r)
14506	}
14507	}
14508
14509	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510	///
14511	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517	///
14518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519	#[inline]
14520	#[target_feature(enable = "avx512f")]
14521	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14522	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14523	#[rustc_legacy_const_generics(`2`)]
14524	pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525	unsafe {
14526	static_assert_rounding!(ROUNDING);
14527	let a: f32x16 = a.as_f32x16();
14528	let r: i32x16 = vcvtps2dq(a, src:i32x16::ZERO, mask:k, ROUNDING);
14529	transmute(src:r)
14530	}
14531	}
14532
14533	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534	///
14535	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541	///
14542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543	#[inline]
14544	#[target_feature(enable = "avx512f")]
14545	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14546	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14547	#[rustc_legacy_const_generics(`1`)]
14548	pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549	unsafe {
14550	static_assert_rounding!(ROUNDING);
14551	let a: f32x16 = a.as_f32x16();
14552	let r: u32x16 = vcvtps2udq(a, src:u32x16::ZERO, mask:`0b11111111_11111111`, ROUNDING);
14553	transmute(src:r)
14554	}
14555	}
14556
14557	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558	///
14559	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565	///
14566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567	#[inline]
14568	#[target_feature(enable = "avx512f")]
14569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14570	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14571	#[rustc_legacy_const_generics(`3`)]
14572	pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573	src: __m512i,
14574	k: __mmask16,
14575	a: __m512,
14576	) -> __m512i {
14577	unsafe {
14578	static_assert_rounding!(ROUNDING);
14579	let a: f32x16 = a.as_f32x16();
14580	let src: u32x16 = src.as_u32x16();
14581	let r: u32x16 = vcvtps2udq(a, src, mask:k, ROUNDING);
14582	transmute(src:r)
14583	}
14584	}
14585
14586	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587	///
14588	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594	///
14595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596	#[inline]
14597	#[target_feature(enable = "avx512f")]
14598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14599	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14600	#[rustc_legacy_const_generics(`2`)]
14601	pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602	unsafe {
14603	static_assert_rounding!(ROUNDING);
14604	let a: f32x16 = a.as_f32x16();
14605	let r: u32x16 = vcvtps2udq(a, src:u32x16::ZERO, mask:k, ROUNDING);
14606	transmute(src:r)
14607	}
14608	}
14609
14610	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612	///
14613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614	#[inline]
14615	#[target_feature(enable = "avx512f")]
14616	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
14618	#[rustc_legacy_const_generics(`1`)]
14619	pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620	unsafe {
14621	static_assert_sae!(SAE);
14622	let a: f32x8 = a.as_f32x8();
14623	let r: f64x8 = vcvtps2pd(a, src:f64x8::ZERO, mask:`0b11111111`, SAE);
14624	transmute(src:r)
14625	}
14626	}
14627
14628	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630	///
14631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632	#[inline]
14633	#[target_feature(enable = "avx512f")]
14634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14635	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
14636	#[rustc_legacy_const_generics(`3`)]
14637	pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638	unsafe {
14639	static_assert_sae!(SAE);
14640	let a: f32x8 = a.as_f32x8();
14641	let src: f64x8 = src.as_f64x8();
14642	let r: f64x8 = vcvtps2pd(a, src, mask:k, SAE);
14643	transmute(src:r)
14644	}
14645	}
14646
14647	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649	///
14650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651	#[inline]
14652	#[target_feature(enable = "avx512f")]
14653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14654	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
14655	#[rustc_legacy_const_generics(`2`)]
14656	pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657	unsafe {
14658	static_assert_sae!(SAE);
14659	let a: f32x8 = a.as_f32x8();
14660	let r: f64x8 = vcvtps2pd(a, src:f64x8::ZERO, mask:k, SAE);
14661	transmute(src:r)
14662	}
14663	}
14664
14665	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666	///
14667	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673	///
14674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675	#[inline]
14676	#[target_feature(enable = "avx512f")]
14677	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14678	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
14679	#[rustc_legacy_const_generics(`1`)]
14680	pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681	unsafe {
14682	static_assert_rounding!(ROUNDING);
14683	let a: f64x8 = a.as_f64x8();
14684	let r: i32x8 = vcvtpd2dq(a, src:i32x8::ZERO, mask:`0b11111111`, ROUNDING);
14685	transmute(src:r)
14686	}
14687	}
14688
14689	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690	///
14691	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697	///
14698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699	#[inline]
14700	#[target_feature(enable = "avx512f")]
14701	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14702	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
14703	#[rustc_legacy_const_generics(`3`)]
14704	pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705	src: __m256i,
14706	k: __mmask8,
14707	a: __m512d,
14708	) -> __m256i {
14709	unsafe {
14710	static_assert_rounding!(ROUNDING);
14711	let a: f64x8 = a.as_f64x8();
14712	let src: i32x8 = src.as_i32x8();
14713	let r: i32x8 = vcvtpd2dq(a, src, mask:k, ROUNDING);
14714	transmute(src:r)
14715	}
14716	}
14717
14718	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719	///
14720	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726	///
14727	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728	#[inline]
14729	#[target_feature(enable = "avx512f")]
14730	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14731	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
14732	#[rustc_legacy_const_generics(`2`)]
14733	pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734	unsafe {
14735	static_assert_rounding!(ROUNDING);
14736	let a: f64x8 = a.as_f64x8();
14737	let r: i32x8 = vcvtpd2dq(a, src:i32x8::ZERO, mask:k, ROUNDING);
14738	transmute(src:r)
14739	}
14740	}
14741
14742	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743	///
14744	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750	///
14751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752	#[inline]
14753	#[target_feature(enable = "avx512f")]
14754	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14755	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
14756	#[rustc_legacy_const_generics(`1`)]
14757	pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758	unsafe {
14759	static_assert_rounding!(ROUNDING);
14760	let a: f64x8 = a.as_f64x8();
14761	let r: u32x8 = vcvtpd2udq(a, src:u32x8::ZERO, mask:`0b11111111`, ROUNDING);
14762	transmute(src:r)
14763	}
14764	}
14765
14766	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767	///
14768	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774	///
14775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776	#[inline]
14777	#[target_feature(enable = "avx512f")]
14778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
14780	#[rustc_legacy_const_generics(`3`)]
14781	pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782	src: __m256i,
14783	k: __mmask8,
14784	a: __m512d,
14785	) -> __m256i {
14786	unsafe {
14787	static_assert_rounding!(ROUNDING);
14788	let a: f64x8 = a.as_f64x8();
14789	let src: u32x8 = src.as_u32x8();
14790	let r: u32x8 = vcvtpd2udq(a, src, mask:k, ROUNDING);
14791	transmute(src:r)
14792	}
14793	}
14794
14795	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796	///
14797	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803	///
14804	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805	#[inline]
14806	#[target_feature(enable = "avx512f")]
14807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14808	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
14809	#[rustc_legacy_const_generics(`2`)]
14810	pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811	unsafe {
14812	static_assert_rounding!(ROUNDING);
14813	let a: f64x8 = a.as_f64x8();
14814	let r: u32x8 = vcvtpd2udq(a, src:u32x8::ZERO, mask:k, ROUNDING);
14815	transmute(src:r)
14816	}
14817	}
14818
14819	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820	///
14821	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827	///
14828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829	#[inline]
14830	#[target_feature(enable = "avx512f")]
14831	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14832	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
14833	#[rustc_legacy_const_generics(`1`)]
14834	pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835	unsafe {
14836	static_assert_rounding!(ROUNDING);
14837	let a: f64x8 = a.as_f64x8();
14838	let r: f32x8 = vcvtpd2ps(a, src:f32x8::ZERO, mask:`0b11111111`, ROUNDING);
14839	transmute(src:r)
14840	}
14841	}
14842
14843	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844	///
14845	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851	///
14852	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853	#[inline]
14854	#[target_feature(enable = "avx512f")]
14855	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14856	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
14857	#[rustc_legacy_const_generics(`3`)]
14858	pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859	src: __m256,
14860	k: __mmask8,
14861	a: __m512d,
14862	) -> __m256 {
14863	unsafe {
14864	static_assert_rounding!(ROUNDING);
14865	let a: f64x8 = a.as_f64x8();
14866	let src: f32x8 = src.as_f32x8();
14867	let r: f32x8 = vcvtpd2ps(a, src, mask:k, ROUNDING);
14868	transmute(src:r)
14869	}
14870	}
14871
14872	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873	///
14874	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880	///
14881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882	#[inline]
14883	#[target_feature(enable = "avx512f")]
14884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14885	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
14886	#[rustc_legacy_const_generics(`2`)]
14887	pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888	unsafe {
14889	static_assert_rounding!(ROUNDING);
14890	let a: f64x8 = a.as_f64x8();
14891	let r: f32x8 = vcvtpd2ps(a, src:f32x8::ZERO, mask:k, ROUNDING);
14892	transmute(src:r)
14893	}
14894	}
14895
14896	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897	///
14898	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904	///
14905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906	#[inline]
14907	#[target_feature(enable = "avx512f")]
14908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
14910	#[rustc_legacy_const_generics(`1`)]
14911	pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912	unsafe {
14913	static_assert_rounding!(ROUNDING);
14914	let a: i32x16 = a.as_i32x16();
14915	let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14916	transmute(src:r)
14917	}
14918	}
14919
14920	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921	///
14922	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928	///
14929	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930	#[inline]
14931	#[target_feature(enable = "avx512f")]
14932	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14933	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
14934	#[rustc_legacy_const_generics(`3`)]
14935	pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936	src: __m512,
14937	k: __mmask16,
14938	a: __m512i,
14939	) -> __m512 {
14940	unsafe {
14941	static_assert_rounding!(ROUNDING);
14942	let a: i32x16 = a.as_i32x16();
14943	let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14944	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14945	}
14946	}
14947
14948	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949	///
14950	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956	///
14957	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958	#[inline]
14959	#[target_feature(enable = "avx512f")]
14960	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14961	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
14962	#[rustc_legacy_const_generics(`2`)]
14963	pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964	unsafe {
14965	static_assert_rounding!(ROUNDING);
14966	let a: i32x16 = a.as_i32x16();
14967	let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14968	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
14969	}
14970	}
14971
14972	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973	///
14974	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980	///
14981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982	#[inline]
14983	#[target_feature(enable = "avx512f")]
14984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
14986	#[rustc_legacy_const_generics(`1`)]
14987	pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988	unsafe {
14989	static_assert_rounding!(ROUNDING);
14990	let a: u32x16 = a.as_u32x16();
14991	let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14992	transmute(src:r)
14993	}
14994	}
14995
14996	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997	///
14998	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004	///
15005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006	#[inline]
15007	#[target_feature(enable = "avx512f")]
15008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15009	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
15010	#[rustc_legacy_const_generics(`3`)]
15011	pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012	src: __m512,
15013	k: __mmask16,
15014	a: __m512i,
15015	) -> __m512 {
15016	unsafe {
15017	static_assert_rounding!(ROUNDING);
15018	let a: u32x16 = a.as_u32x16();
15019	let r: f32x16 = vcvtudq2ps(a, ROUNDING);
15020	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15021	}
15022	}
15023
15024	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025	///
15026	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032	///
15033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034	#[inline]
15035	#[target_feature(enable = "avx512f")]
15036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15037	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
15038	#[rustc_legacy_const_generics(`2`)]
15039	pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040	unsafe {
15041	static_assert_rounding!(ROUNDING);
15042	let a: u32x16 = a.as_u32x16();
15043	let r: f32x16 = vcvtudq2ps(a, ROUNDING);
15044	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15045	}
15046	}
15047
15048	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15050	///
15051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15052	#[inline]
15053	#[target_feature(enable = "avx512f")]
15054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15055	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15056	#[rustc_legacy_const_generics(`1`)]
15057	pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
15058	unsafe {
15059	static_assert_sae!(SAE);
15060	let a: f32x16 = a.as_f32x16();
15061	let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:`0b11111111_11111111`);
15062	transmute(src:r)
15063	}
15064	}
15065
15066	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15067	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15068	///
15069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15070	#[inline]
15071	#[target_feature(enable = "avx512f")]
15072	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15073	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15074	#[rustc_legacy_const_generics(`3`)]
15075	pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
15076	src: __m256i,
15077	k: __mmask16,
15078	a: __m512,
15079	) -> __m256i {
15080	unsafe {
15081	static_assert_sae!(SAE);
15082	let a: f32x16 = a.as_f32x16();
15083	let src: i16x16 = src.as_i16x16();
15084	let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
15085	transmute(src:r)
15086	}
15087	}
15088
15089	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15090	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15091	///
15092	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15093	#[inline]
15094	#[target_feature(enable = "avx512f")]
15095	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15096	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15097	#[rustc_legacy_const_generics(`2`)]
15098	pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15099	unsafe {
15100	static_assert_sae!(SAE);
15101	let a: f32x16 = a.as_f32x16();
15102	let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:k);
15103	transmute(src:r)
15104	}
15105	}
15106
15107	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15108	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15109	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15110	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15111	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15112	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15113	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15114	///
15115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15116	#[inline]
15117	#[target_feature(enable = "avx512f,avx512vl")]
15118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15119	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15120	#[rustc_legacy_const_generics(`3`)]
15121	pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15122	src: __m128i,
15123	k: __mmask8,
15124	a: __m256,
15125	) -> __m128i {
15126	unsafe {
15127	static_assert_uimm_bits!(IMM8, `8`);
15128	let a: f32x8 = a.as_f32x8();
15129	let src: i16x8 = src.as_i16x8();
15130	let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15131	transmute(src:r)
15132	}
15133	}
15134
15135	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15136	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15137	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15138	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15139	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15140	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15141	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15142	///
15143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15144	#[inline]
15145	#[target_feature(enable = "avx512f,avx512vl")]
15146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15147	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15148	#[rustc_legacy_const_generics(`2`)]
15149	pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15150	unsafe {
15151	static_assert_uimm_bits!(IMM8, `8`);
15152	let a: f32x8 = a.as_f32x8();
15153	let r: i16x8 = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15154	transmute(src:r)
15155	}
15156	}
15157
15158	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15159	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15160	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15161	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15162	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15163	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15164	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15165	///
15166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15167	#[inline]
15168	#[target_feature(enable = "avx512f,avx512vl")]
15169	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15170	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15171	#[rustc_legacy_const_generics(`3`)]
15172	pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15173	unsafe {
15174	static_assert_uimm_bits!(IMM8, `8`);
15175	let a: f32x4 = a.as_f32x4();
15176	let src: i16x8 = src.as_i16x8();
15177	let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15178	transmute(src:r)
15179	}
15180	}
15181
15182	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15183	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15184	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15185	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15186	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15187	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15188	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15189	///
15190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15191	#[inline]
15192	#[target_feature(enable = "avx512f,avx512vl")]
15193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15194	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15195	#[rustc_legacy_const_generics(`2`)]
15196	pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15197	unsafe {
15198	static_assert_uimm_bits!(IMM8, `8`);
15199	let a: f32x4 = a.as_f32x4();
15200	let r: i16x8 = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15201	transmute(src:r)
15202	}
15203	}
15204
15205	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15206	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15207	///
15208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15209	#[inline]
15210	#[target_feature(enable = "avx512f")]
15211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15212	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15213	#[rustc_legacy_const_generics(`1`)]
15214	pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
15215	unsafe {
15216	static_assert_sae!(SAE);
15217	let a: f32x16 = a.as_f32x16();
15218	let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:`0b11111111_11111111`);
15219	transmute(src:r)
15220	}
15221	}
15222
15223	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15224	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15225	///
15226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15227	#[inline]
15228	#[target_feature(enable = "avx512f")]
15229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15230	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15231	#[rustc_legacy_const_generics(`3`)]
15232	pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15233	unsafe {
15234	static_assert_sae!(SAE);
15235	let a: f32x16 = a.as_f32x16();
15236	let src: i16x16 = src.as_i16x16();
15237	let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
15238	transmute(src:r)
15239	}
15240	}
15241
15242	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15243	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15244	///
15245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15246	#[inline]
15247	#[target_feature(enable = "avx512f")]
15248	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15249	#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = `8`))]
15250	#[rustc_legacy_const_generics(`2`)]
15251	pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15252	unsafe {
15253	static_assert_sae!(SAE);
15254	let a: f32x16 = a.as_f32x16();
15255	let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:k);
15256	transmute(src:r)
15257	}
15258	}
15259
15260	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15261	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15262	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15263	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15264	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15265	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15266	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15267	///
15268	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15269	#[inline]
15270	#[target_feature(enable = "avx512f,avx512vl")]
15271	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15272	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15273	#[rustc_legacy_const_generics(`3`)]
15274	pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15275	unsafe {
15276	static_assert_uimm_bits!(IMM8, `8`);
15277	let a: f32x8 = a.as_f32x8();
15278	let src: i16x8 = src.as_i16x8();
15279	let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15280	transmute(src:r)
15281	}
15282	}
15283
15284	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15285	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15286	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15287	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15288	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15289	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15290	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15291	///
15292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15293	#[inline]
15294	#[target_feature(enable = "avx512f,avx512vl")]
15295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15296	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15297	#[rustc_legacy_const_generics(`2`)]
15298	pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15299	unsafe {
15300	static_assert_uimm_bits!(IMM8, `8`);
15301	let a: f32x8 = a.as_f32x8();
15302	let r: i16x8 = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15303	transmute(src:r)
15304	}
15305	}
15306
15307	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15308	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15309	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15310	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15311	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15312	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15313	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15314	///
15315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15316	#[inline]
15317	#[target_feature(enable = "avx512f,avx512vl")]
15318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15319	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15320	#[rustc_legacy_const_generics(`3`)]
15321	pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15322	unsafe {
15323	static_assert_uimm_bits!(IMM8, `8`);
15324	let a: f32x4 = a.as_f32x4();
15325	let src: i16x8 = src.as_i16x8();
15326	let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15327	transmute(src:r)
15328	}
15329	}
15330
15331	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15332	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15333	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15334	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15335	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15336	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15337	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15338	///
15339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15340	#[inline]
15341	#[target_feature(enable = "avx512f,avx512vl")]
15342	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15343	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15344	#[rustc_legacy_const_generics(`2`)]
15345	pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15346	unsafe {
15347	static_assert_uimm_bits!(IMM8, `8`);
15348	let a: f32x4 = a.as_f32x4();
15349	let r: i16x8 = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15350	transmute(src:r)
15351	}
15352	}
15353
15354	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15355	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356	///
15357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15358	#[inline]
15359	#[target_feature(enable = "avx512f")]
15360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15362	#[rustc_legacy_const_generics(`1`)]
15363	pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15364	unsafe {
15365	static_assert_sae!(SAE);
15366	let a: i16x16 = a.as_i16x16();
15367	let r: f32x16 = vcvtph2ps(a, src:f32x16::ZERO, mask:`0b11111111_11111111`, SAE);
15368	transmute(src:r)
15369	}
15370	}
15371
15372	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15373	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15374	///
15375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15376	#[inline]
15377	#[target_feature(enable = "avx512f")]
15378	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15379	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15380	#[rustc_legacy_const_generics(`3`)]
15381	pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15382	unsafe {
15383	static_assert_sae!(SAE);
15384	let a: i16x16 = a.as_i16x16();
15385	let src: f32x16 = src.as_f32x16();
15386	let r: f32x16 = vcvtph2ps(a, src, mask:k, SAE);
15387	transmute(src:r)
15388	}
15389	}
15390
15391	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15393	///
15394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15395	#[inline]
15396	#[target_feature(enable = "avx512f")]
15397	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15398	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15399	#[rustc_legacy_const_generics(`2`)]
15400	pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15401	unsafe {
15402	static_assert_sae!(SAE);
15403	let a: i16x16 = a.as_i16x16();
15404	let r: f32x16 = vcvtph2ps(a, src:f32x16::ZERO, mask:k, SAE);
15405	transmute(src:r)
15406	}
15407	}
15408
15409	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15410	///
15411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15412	#[inline]
15413	#[target_feature(enable = "avx512f")]
15414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15415	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15416	pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15417	unsafe {
15418	transmute(src:vcvtph2ps(
15419	a.as_i16x16(),
15420	src:f32x16::ZERO,
15421	mask:`0b11111111_11111111`,
15422	_MM_FROUND_NO_EXC,
15423	))
15424	}
15425	}
15426
15427	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428	///
15429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15430	#[inline]
15431	#[target_feature(enable = "avx512f")]
15432	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15434	pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15435	unsafe {
15436	transmute(src:vcvtph2ps(
15437	a.as_i16x16(),
15438	src.as_f32x16(),
15439	mask:k,
15440	_MM_FROUND_NO_EXC,
15441	))
15442	}
15443	}
15444
15445	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15446	///
15447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15448	#[inline]
15449	#[target_feature(enable = "avx512f")]
15450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15451	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15452	pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15453	unsafe { transmute(src:vcvtph2ps(a.as_i16x16(), src:f32x16::ZERO, mask:k, _MM_FROUND_NO_EXC)) }
15454	}
15455
15456	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15457	///
15458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15459	#[inline]
15460	#[target_feature(enable = "avx512f,avx512vl")]
15461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15462	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15463	pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15464	unsafe {
15465	let convert: __m256 = _mm256_cvtph_ps(a);
15466	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15467	}
15468	}
15469
15470	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15471	///
15472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15473	#[inline]
15474	#[target_feature(enable = "avx512f,avx512vl")]
15475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15476	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15477	pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15478	unsafe {
15479	let convert: __m256 = _mm256_cvtph_ps(a);
15480	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:f32x8::ZERO))
15481	}
15482	}
15483
15484	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15485	///
15486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15487	#[inline]
15488	#[target_feature(enable = "avx512f,avx512vl")]
15489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15490	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15491	pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15492	unsafe {
15493	let convert: __m128 = _mm_cvtph_ps(a);
15494	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15495	}
15496	}
15497
15498	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15499	///
15500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15501	#[inline]
15502	#[target_feature(enable = "avx512f,avx512vl")]
15503	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15504	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15505	pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15506	unsafe {
15507	let convert: __m128 = _mm_cvtph_ps(a);
15508	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
15509	}
15510	}
15511
15512	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15513	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15514	///
15515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15516	#[inline]
15517	#[target_feature(enable = "avx512f")]
15518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15519	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15520	#[rustc_legacy_const_generics(`1`)]
15521	pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15522	unsafe {
15523	static_assert_sae!(SAE);
15524	let a: f32x16 = a.as_f32x16();
15525	let r: i32x16 = vcvttps2dq(a, src:i32x16::ZERO, mask:`0b11111111_11111111`, SAE);
15526	transmute(src:r)
15527	}
15528	}
15529
15530	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15532	///
15533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15534	#[inline]
15535	#[target_feature(enable = "avx512f")]
15536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15537	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15538	#[rustc_legacy_const_generics(`3`)]
15539	pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15540	src: __m512i,
15541	k: __mmask16,
15542	a: __m512,
15543	) -> __m512i {
15544	unsafe {
15545	static_assert_sae!(SAE);
15546	let a: f32x16 = a.as_f32x16();
15547	let src: i32x16 = src.as_i32x16();
15548	let r: i32x16 = vcvttps2dq(a, src, mask:k, SAE);
15549	transmute(src:r)
15550	}
15551	}
15552
15553	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15554	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15555	///
15556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15557	#[inline]
15558	#[target_feature(enable = "avx512f")]
15559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15560	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15561	#[rustc_legacy_const_generics(`2`)]
15562	pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15563	unsafe {
15564	static_assert_sae!(SAE);
15565	let a: f32x16 = a.as_f32x16();
15566	let r: i32x16 = vcvttps2dq(a, src:i32x16::ZERO, mask:k, SAE);
15567	transmute(src:r)
15568	}
15569	}
15570
15571	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15572	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15573	///
15574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15575	#[inline]
15576	#[target_feature(enable = "avx512f")]
15577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15578	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
15579	#[rustc_legacy_const_generics(`1`)]
15580	pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15581	unsafe {
15582	static_assert_sae!(SAE);
15583	let a: f32x16 = a.as_f32x16();
15584	let r: u32x16 = vcvttps2udq(a, src:u32x16::ZERO, mask:`0b11111111_11111111`, SAE);
15585	transmute(src:r)
15586	}
15587	}
15588
15589	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15590	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15591	///
15592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15593	#[inline]
15594	#[target_feature(enable = "avx512f")]
15595	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15596	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
15597	#[rustc_legacy_const_generics(`3`)]
15598	pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15599	src: __m512i,
15600	k: __mmask16,
15601	a: __m512,
15602	) -> __m512i {
15603	unsafe {
15604	static_assert_sae!(SAE);
15605	let a: f32x16 = a.as_f32x16();
15606	let src: u32x16 = src.as_u32x16();
15607	let r: u32x16 = vcvttps2udq(a, src, mask:k, SAE);
15608	transmute(src:r)
15609	}
15610	}
15611
15612	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15613	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15614	///
15615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15616	#[inline]
15617	#[target_feature(enable = "avx512f")]
15618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15619	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
15620	#[rustc_legacy_const_generics(`2`)]
15621	pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15622	unsafe {
15623	static_assert_sae!(SAE);
15624	let a: f32x16 = a.as_f32x16();
15625	let r: u32x16 = vcvttps2udq(a, src:u32x16::ZERO, mask:k, SAE);
15626	transmute(src:r)
15627	}
15628	}
15629
15630	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15631	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15632	///
15633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15634	#[inline]
15635	#[target_feature(enable = "avx512f")]
15636	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15637	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
15638	#[rustc_legacy_const_generics(`1`)]
15639	pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15640	unsafe {
15641	static_assert_sae!(SAE);
15642	let a: f64x8 = a.as_f64x8();
15643	let r: i32x8 = vcvttpd2dq(a, src:i32x8::ZERO, mask:`0b11111111`, SAE);
15644	transmute(src:r)
15645	}
15646	}
15647
15648	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15649	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15650	///
15651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15652	#[inline]
15653	#[target_feature(enable = "avx512f")]
15654	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15655	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
15656	#[rustc_legacy_const_generics(`3`)]
15657	pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15658	src: __m256i,
15659	k: __mmask8,
15660	a: __m512d,
15661	) -> __m256i {
15662	unsafe {
15663	static_assert_sae!(SAE);
15664	let a: f64x8 = a.as_f64x8();
15665	let src: i32x8 = src.as_i32x8();
15666	let r: i32x8 = vcvttpd2dq(a, src, mask:k, SAE);
15667	transmute(src:r)
15668	}
15669	}
15670
15671	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15672	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15673	///
15674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15675	#[inline]
15676	#[target_feature(enable = "avx512f")]
15677	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15678	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
15679	#[rustc_legacy_const_generics(`2`)]
15680	pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15681	unsafe {
15682	static_assert_sae!(SAE);
15683	let a: f64x8 = a.as_f64x8();
15684	let r: i32x8 = vcvttpd2dq(a, src:i32x8::ZERO, mask:k, SAE);
15685	transmute(src:r)
15686	}
15687	}
15688
15689	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15690	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15691	///
15692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15693	#[inline]
15694	#[target_feature(enable = "avx512f")]
15695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15696	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
15697	#[rustc_legacy_const_generics(`1`)]
15698	pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15699	unsafe {
15700	static_assert_sae!(SAE);
15701	let a: f64x8 = a.as_f64x8();
15702	let r: u32x8 = vcvttpd2udq(a, src:i32x8::ZERO, mask:`0b11111111`, SAE);
15703	transmute(src:r)
15704	}
15705	}
15706
15707	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15708	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15709	///
15710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15711	#[inline]
15712	#[target_feature(enable = "avx512f")]
15713	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15714	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
15715	#[rustc_legacy_const_generics(`3`)]
15716	pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15717	src: __m256i,
15718	k: __mmask8,
15719	a: __m512d,
15720	) -> __m256i {
15721	unsafe {
15722	static_assert_sae!(SAE);
15723	let a: f64x8 = a.as_f64x8();
15724	let src: i32x8 = src.as_i32x8();
15725	let r: u32x8 = vcvttpd2udq(a, src, mask:k, SAE);
15726	transmute(src:r)
15727	}
15728	}
15729
15730	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15731	///
15732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15733	#[inline]
15734	#[target_feature(enable = "avx512f")]
15735	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15736	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15737	pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15738	unsafe {
15739	transmute(src:vcvttps2dq(
15740	a.as_f32x16(),
15741	src:i32x16::ZERO,
15742	mask:`0b11111111_11111111`,
15743	_MM_FROUND_CUR_DIRECTION,
15744	))
15745	}
15746	}
15747
15748	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15749	///
15750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15751	#[inline]
15752	#[target_feature(enable = "avx512f")]
15753	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15754	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15755	pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15756	unsafe {
15757	transmute(src:vcvttps2dq(
15758	a.as_f32x16(),
15759	src.as_i32x16(),
15760	mask:k,
15761	_MM_FROUND_CUR_DIRECTION,
15762	))
15763	}
15764	}
15765
15766	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15767	///
15768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15769	#[inline]
15770	#[target_feature(enable = "avx512f")]
15771	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15772	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15773	pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15774	unsafe {
15775	transmute(src:vcvttps2dq(
15776	a.as_f32x16(),
15777	src:i32x16::ZERO,
15778	mask:k,
15779	_MM_FROUND_CUR_DIRECTION,
15780	))
15781	}
15782	}
15783
15784	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15785	///
15786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15787	#[inline]
15788	#[target_feature(enable = "avx512f,avx512vl")]
15789	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15790	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15791	pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15792	unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), mask:k)) }
15793	}
15794
15795	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15796	///
15797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15798	#[inline]
15799	#[target_feature(enable = "avx512f,avx512vl")]
15800	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15801	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15802	pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15803	unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src:i32x8::ZERO, mask:k)) }
15804	}
15805
15806	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15807	///
15808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15809	#[inline]
15810	#[target_feature(enable = "avx512f,avx512vl")]
15811	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15812	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15813	pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15814	unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), mask:k)) }
15815	}
15816
15817	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15818	///
15819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15820	#[inline]
15821	#[target_feature(enable = "avx512f,avx512vl")]
15822	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15823	#[cfg_attr(test, assert_instr(vcvttps2dq))]
15824	pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15825	unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src:i32x4::ZERO, mask:k)) }
15826	}
15827
15828	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15829	///
15830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15831	#[inline]
15832	#[target_feature(enable = "avx512f")]
15833	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15834	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15835	pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15836	unsafe {
15837	transmute(src:vcvttps2udq(
15838	a.as_f32x16(),
15839	src:u32x16::ZERO,
15840	mask:`0b11111111_11111111`,
15841	_MM_FROUND_CUR_DIRECTION,
15842	))
15843	}
15844	}
15845
15846	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15847	///
15848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15849	#[inline]
15850	#[target_feature(enable = "avx512f")]
15851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15852	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15853	pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15854	unsafe {
15855	transmute(src:vcvttps2udq(
15856	a.as_f32x16(),
15857	src.as_u32x16(),
15858	mask:k,
15859	_MM_FROUND_CUR_DIRECTION,
15860	))
15861	}
15862	}
15863
15864	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15865	///
15866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15867	#[inline]
15868	#[target_feature(enable = "avx512f")]
15869	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15870	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15871	pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15872	unsafe {
15873	transmute(src:vcvttps2udq(
15874	a.as_f32x16(),
15875	src:u32x16::ZERO,
15876	mask:k,
15877	_MM_FROUND_CUR_DIRECTION,
15878	))
15879	}
15880	}
15881
15882	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15883	///
15884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15885	#[inline]
15886	#[target_feature(enable = "avx512f,avx512vl")]
15887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15888	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15889	pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15890	unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:`0b11111111`)) }
15891	}
15892
15893	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15894	///
15895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15896	#[inline]
15897	#[target_feature(enable = "avx512f,avx512vl")]
15898	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15899	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15900	pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15901	unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
15902	}
15903
15904	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15905	///
15906	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15907	#[inline]
15908	#[target_feature(enable = "avx512f,avx512vl")]
15909	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15910	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15911	pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15912	unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
15913	}
15914
15915	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15916	///
15917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15918	#[inline]
15919	#[target_feature(enable = "avx512f,avx512vl")]
15920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15922	pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15923	unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
15924	}
15925
15926	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15927	///
15928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15929	#[inline]
15930	#[target_feature(enable = "avx512f,avx512vl")]
15931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15933	pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15934	unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
15935	}
15936
15937	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15938	///
15939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15940	#[inline]
15941	#[target_feature(enable = "avx512f,avx512vl")]
15942	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15943	#[cfg_attr(test, assert_instr(vcvttps2udq))]
15944	pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15945	unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
15946	}
15947
15948	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15949	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15950	///
15951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15952	#[inline]
15953	#[target_feature(enable = "avx512f")]
15954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15955	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
15956	#[rustc_legacy_const_generics(`2`)]
15957	pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15958	unsafe {
15959	static_assert_sae!(SAE);
15960	let a: f64x8 = a.as_f64x8();
15961	let r: u32x8 = vcvttpd2udq(a, src:i32x8::ZERO, mask:k, SAE);
15962	transmute(src:r)
15963	}
15964	}
15965
15966	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15967	///
15968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15969	#[inline]
15970	#[target_feature(enable = "avx512f")]
15971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15972	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15973	pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15974	unsafe {
15975	transmute(src:vcvttpd2dq(
15976	a.as_f64x8(),
15977	src:i32x8::ZERO,
15978	mask:`0b11111111`,
15979	_MM_FROUND_CUR_DIRECTION,
15980	))
15981	}
15982	}
15983
15984	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15985	///
15986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15987	#[inline]
15988	#[target_feature(enable = "avx512f")]
15989	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15990	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15991	pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15992	unsafe {
15993	transmute(src:vcvttpd2dq(
15994	a.as_f64x8(),
15995	src.as_i32x8(),
15996	mask:k,
15997	_MM_FROUND_CUR_DIRECTION,
15998	))
15999	}
16000	}
16001
16002	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16003	///
16004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16005	#[inline]
16006	#[target_feature(enable = "avx512f")]
16007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16009	pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16010	unsafe {
16011	transmute(src:vcvttpd2dq(
16012	a.as_f64x8(),
16013	src:i32x8::ZERO,
16014	mask:k,
16015	_MM_FROUND_CUR_DIRECTION,
16016	))
16017	}
16018	}
16019
16020	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16021	///
16022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16023	#[inline]
16024	#[target_feature(enable = "avx512f,avx512vl")]
16025	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16026	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16027	pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16028	unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16029	}
16030
16031	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16032	///
16033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16034	#[inline]
16035	#[target_feature(enable = "avx512f,avx512vl")]
16036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16037	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16038	pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16039	unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16040	}
16041
16042	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16043	///
16044	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16045	#[inline]
16046	#[target_feature(enable = "avx512f,avx512vl")]
16047	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16048	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16049	pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16050	unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16051	}
16052
16053	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16054	///
16055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16056	#[inline]
16057	#[target_feature(enable = "avx512f,avx512vl")]
16058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16059	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16060	pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16061	unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16062	}
16063
16064	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16065	///
16066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16067	#[inline]
16068	#[target_feature(enable = "avx512f")]
16069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16070	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16071	pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16072	unsafe {
16073	transmute(src:vcvttpd2udq(
16074	a.as_f64x8(),
16075	src:i32x8::ZERO,
16076	mask:`0b11111111`,
16077	_MM_FROUND_CUR_DIRECTION,
16078	))
16079	}
16080	}
16081
16082	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16083	///
16084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16085	#[inline]
16086	#[target_feature(enable = "avx512f")]
16087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16088	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16089	pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16090	unsafe {
16091	transmute(src:vcvttpd2udq(
16092	a.as_f64x8(),
16093	src.as_i32x8(),
16094	mask:k,
16095	_MM_FROUND_CUR_DIRECTION,
16096	))
16097	}
16098	}
16099
16100	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16101	///
16102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16103	#[inline]
16104	#[target_feature(enable = "avx512f")]
16105	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16106	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16107	pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16108	unsafe {
16109	transmute(src:vcvttpd2udq(
16110	a.as_f64x8(),
16111	src:i32x8::ZERO,
16112	mask:k,
16113	_MM_FROUND_CUR_DIRECTION,
16114	))
16115	}
16116	}
16117
16118	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16119	///
16120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16121	#[inline]
16122	#[target_feature(enable = "avx512f,avx512vl")]
16123	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16124	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16125	pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16126	unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:`0b11111111`)) }
16127	}
16128
16129	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16130	///
16131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16132	#[inline]
16133	#[target_feature(enable = "avx512f,avx512vl")]
16134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16135	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16136	pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16137	unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16138	}
16139
16140	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16141	///
16142	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16143	#[inline]
16144	#[target_feature(enable = "avx512f,avx512vl")]
16145	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16146	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16147	pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16148	unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16149	}
16150
16151	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16152	///
16153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16154	#[inline]
16155	#[target_feature(enable = "avx512f,avx512vl")]
16156	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16157	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16158	pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16159	unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:`0b11111111`)) }
16160	}
16161
16162	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16163	///
16164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16165	#[inline]
16166	#[target_feature(enable = "avx512f,avx512vl")]
16167	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16168	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16169	pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16170	unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16171	}
16172
16173	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16174	///
16175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16176	#[inline]
16177	#[target_feature(enable = "avx512f,avx512vl")]
16178	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16179	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16180	pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16181	unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16182	}
16183
16184	/// Returns vector of type `__m512d` with all elements set to zero.
16185	///
16186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16187	#[inline]
16188	#[target_feature(enable = "avx512f")]
16189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16190	#[cfg_attr(test, assert_instr(vxorps))]
16191	pub fn _mm512_setzero_pd() -> __m512d {
16192	// All-0 is a properly initialized __m512d
16193	unsafe { const { mem::zeroed() } }
16194	}
16195
16196	/// Returns vector of type `__m512` with all elements set to zero.
16197	///
16198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16199	#[inline]
16200	#[target_feature(enable = "avx512f")]
16201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16202	#[cfg_attr(test, assert_instr(vxorps))]
16203	pub fn _mm512_setzero_ps() -> __m512 {
16204	// All-0 is a properly initialized __m512
16205	unsafe { const { mem::zeroed() } }
16206	}
16207
16208	/// Return vector of type `__m512` with all elements set to zero.
16209	///
16210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16211	#[inline]
16212	#[target_feature(enable = "avx512f")]
16213	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16214	#[cfg_attr(test, assert_instr(vxorps))]
16215	pub fn _mm512_setzero() -> __m512 {
16216	// All-0 is a properly initialized __m512
16217	unsafe { const { mem::zeroed() } }
16218	}
16219
16220	/// Returns vector of type `__m512i` with all elements set to zero.
16221	///
16222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16223	#[inline]
16224	#[target_feature(enable = "avx512f")]
16225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226	#[cfg_attr(test, assert_instr(vxorps))]
16227	pub fn _mm512_setzero_si512() -> __m512i {
16228	// All-0 is a properly initialized __m512i
16229	unsafe { const { mem::zeroed() } }
16230	}
16231
16232	/// Return vector of type `__m512i` with all elements set to zero.
16233	///
16234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16235	#[inline]
16236	#[target_feature(enable = "avx512f")]
16237	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16238	#[cfg_attr(test, assert_instr(vxorps))]
16239	pub fn _mm512_setzero_epi32() -> __m512i {
16240	// All-0 is a properly initialized __m512i
16241	unsafe { const { mem::zeroed() } }
16242	}
16243
16244	/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16245	/// order.
16246	///
16247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16248	#[inline]
16249	#[target_feature(enable = "avx512f")]
16250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16251	pub fn _mm512_setr_epi32(
16252	e15: i32,
16253	e14: i32,
16254	e13: i32,
16255	e12: i32,
16256	e11: i32,
16257	e10: i32,
16258	e9: i32,
16259	e8: i32,
16260	e7: i32,
16261	e6: i32,
16262	e5: i32,
16263	e4: i32,
16264	e3: i32,
16265	e2: i32,
16266	e1: i32,
16267	e0: i32,
16268	) -> __m512i {
16269	unsafe {
16270	let r: i32x16 = i32x16::new(
16271	x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16272	);
16273	transmute(src:r)
16274	}
16275	}
16276
16277	/// Set packed 8-bit integers in dst with the supplied values.
16278	///
16279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16280	#[inline]
16281	#[target_feature(enable = "avx512f")]
16282	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283	pub fn _mm512_set_epi8(
16284	e63: i8,
16285	e62: i8,
16286	e61: i8,
16287	e60: i8,
16288	e59: i8,
16289	e58: i8,
16290	e57: i8,
16291	e56: i8,
16292	e55: i8,
16293	e54: i8,
16294	e53: i8,
16295	e52: i8,
16296	e51: i8,
16297	e50: i8,
16298	e49: i8,
16299	e48: i8,
16300	e47: i8,
16301	e46: i8,
16302	e45: i8,
16303	e44: i8,
16304	e43: i8,
16305	e42: i8,
16306	e41: i8,
16307	e40: i8,
16308	e39: i8,
16309	e38: i8,
16310	e37: i8,
16311	e36: i8,
16312	e35: i8,
16313	e34: i8,
16314	e33: i8,
16315	e32: i8,
16316	e31: i8,
16317	e30: i8,
16318	e29: i8,
16319	e28: i8,
16320	e27: i8,
16321	e26: i8,
16322	e25: i8,
16323	e24: i8,
16324	e23: i8,
16325	e22: i8,
16326	e21: i8,
16327	e20: i8,
16328	e19: i8,
16329	e18: i8,
16330	e17: i8,
16331	e16: i8,
16332	e15: i8,
16333	e14: i8,
16334	e13: i8,
16335	e12: i8,
16336	e11: i8,
16337	e10: i8,
16338	e9: i8,
16339	e8: i8,
16340	e7: i8,
16341	e6: i8,
16342	e5: i8,
16343	e4: i8,
16344	e3: i8,
16345	e2: i8,
16346	e1: i8,
16347	e0: i8,
16348	) -> __m512i {
16349	unsafe {
16350	let r: i8x64 = i8x64::new(
16351	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16352	x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35,
16353	x36:e36, x37:e37, x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52,
16354	x53:e53, x54:e54, x55:e55, x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16355	);
16356	transmute(src:r)
16357	}
16358	}
16359
16360	/// Set packed 16-bit integers in dst with the supplied values.
16361	///
16362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16363	#[inline]
16364	#[target_feature(enable = "avx512f")]
16365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16366	pub fn _mm512_set_epi16(
16367	e31: i16,
16368	e30: i16,
16369	e29: i16,
16370	e28: i16,
16371	e27: i16,
16372	e26: i16,
16373	e25: i16,
16374	e24: i16,
16375	e23: i16,
16376	e22: i16,
16377	e21: i16,
16378	e20: i16,
16379	e19: i16,
16380	e18: i16,
16381	e17: i16,
16382	e16: i16,
16383	e15: i16,
16384	e14: i16,
16385	e13: i16,
16386	e12: i16,
16387	e11: i16,
16388	e10: i16,
16389	e9: i16,
16390	e8: i16,
16391	e7: i16,
16392	e6: i16,
16393	e5: i16,
16394	e4: i16,
16395	e3: i16,
16396	e2: i16,
16397	e1: i16,
16398	e0: i16,
16399	) -> __m512i {
16400	unsafe {
16401	let r: i16x32 = i16x32::new(
16402	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16403	x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16404	);
16405	transmute(src:r)
16406	}
16407	}
16408
16409	/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16410	///
16411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16412	#[inline]
16413	#[target_feature(enable = "avx512f")]
16414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16415	pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16416	_mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16417	}
16418
16419	/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16420	///
16421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16422	#[inline]
16423	#[target_feature(enable = "avx512f")]
16424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425	pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16426	_mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16427	}
16428
16429	/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16430	///
16431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16432	#[inline]
16433	#[target_feature(enable = "avx512f")]
16434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16435	pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16436	_mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16437	}
16438
16439	/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16440	///
16441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16442	#[inline]
16443	#[target_feature(enable = "avx512f")]
16444	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16445	pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16446	_mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16447	}
16448
16449	/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16450	///
16451	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16452	#[inline]
16453	#[target_feature(enable = "avx512f")]
16454	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16455	pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16456	_mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16457	}
16458
16459	/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16460	///
16461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16462	#[inline]
16463	#[target_feature(enable = "avx512f")]
16464	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16465	pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16466	_mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16467	}
16468
16469	/// Set packed 64-bit integers in dst with the supplied values.
16470	///
16471	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16472	#[inline]
16473	#[target_feature(enable = "avx512f")]
16474	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16475	pub fn _mm512_set_epi64(
16476	e0: i64,
16477	e1: i64,
16478	e2: i64,
16479	e3: i64,
16480	e4: i64,
16481	e5: i64,
16482	e6: i64,
16483	e7: i64,
16484	) -> __m512i {
16485	_mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16486	}
16487
16488	/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16489	///
16490	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16491	#[inline]
16492	#[target_feature(enable = "avx512f")]
16493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16494	pub fn _mm512_setr_epi64(
16495	e0: i64,
16496	e1: i64,
16497	e2: i64,
16498	e3: i64,
16499	e4: i64,
16500	e5: i64,
16501	e6: i64,
16502	e7: i64,
16503	) -> __m512i {
16504	unsafe {
16505	let r: i64x8 = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16506	transmute(src:r)
16507	}
16508	}
16509
16510	/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16511	///
16512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16513	#[inline]
16514	#[target_feature(enable = "avx512f")]
16515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16516	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
16517	#[rustc_legacy_const_generics(`2`)]
16518	pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
16519	static_assert_imm8_scale!(SCALE);
16520	let zero: f64x8 = f64x8::ZERO;
16521	let neg_one: i8 = `-1`;
16522	let slice: const i8 = slice as const i8;
16523	let offsets: i32x8 = offsets.as_i32x8();
16524	let r: f64x8 = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16525	transmute(src:r)
16526	}
16527
16528	/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16529	///
16530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16531	#[inline]
16532	#[target_feature(enable = "avx512f")]
16533	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16534	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
16535	#[rustc_legacy_const_generics(`4`)]
16536	pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16537	src: __m512d,
16538	mask: __mmask8,
16539	offsets: __m256i,
16540	slice: *const u8,
16541	) -> __m512d {
16542	static_assert_imm8_scale!(SCALE);
16543	let src: f64x8 = src.as_f64x8();
16544	let slice: const i8 = slice as const i8;
16545	let offsets: i32x8 = offsets.as_i32x8();
16546	let r: f64x8 = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16547	transmute(src:r)
16548	}
16549
16550	/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16551	///
16552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16553	#[inline]
16554	#[target_feature(enable = "avx512f")]
16555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16556	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
16557	#[rustc_legacy_const_generics(`2`)]
16558	pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
16559	static_assert_imm8_scale!(SCALE);
16560	let zero: f64x8 = f64x8::ZERO;
16561	let neg_one: i8 = `-1`;
16562	let slice: const i8 = slice as const i8;
16563	let offsets: i64x8 = offsets.as_i64x8();
16564	let r: f64x8 = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16565	transmute(src:r)
16566	}
16567
16568	/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16569	///
16570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16571	#[inline]
16572	#[target_feature(enable = "avx512f")]
16573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16574	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
16575	#[rustc_legacy_const_generics(`4`)]
16576	pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16577	src: __m512d,
16578	mask: __mmask8,
16579	offsets: __m512i,
16580	slice: *const u8,
16581	) -> __m512d {
16582	static_assert_imm8_scale!(SCALE);
16583	let src: f64x8 = src.as_f64x8();
16584	let slice: const i8 = slice as const i8;
16585	let offsets: i64x8 = offsets.as_i64x8();
16586	let r: f64x8 = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16587	transmute(src:r)
16588	}
16589
16590	/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16591	///
16592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16593	#[inline]
16594	#[target_feature(enable = "avx512f")]
16595	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16596	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
16597	#[rustc_legacy_const_generics(`2`)]
16598	pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
16599	static_assert_imm8_scale!(SCALE);
16600	let zero: f32x8 = f32x8::ZERO;
16601	let neg_one: i8 = `-1`;
16602	let slice: const i8 = slice as const i8;
16603	let offsets: i64x8 = offsets.as_i64x8();
16604	let r: f32x8 = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
16605	transmute(src:r)
16606	}
16607
16608	/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16609	///
16610	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16611	#[inline]
16612	#[target_feature(enable = "avx512f")]
16613	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16614	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
16615	#[rustc_legacy_const_generics(`4`)]
16616	pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16617	src: __m256,
16618	mask: __mmask8,
16619	offsets: __m512i,
16620	slice: *const u8,
16621	) -> __m256 {
16622	static_assert_imm8_scale!(SCALE);
16623	let src: f32x8 = src.as_f32x8();
16624	let slice: const i8 = slice as const i8;
16625	let offsets: i64x8 = offsets.as_i64x8();
16626	let r: f32x8 = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16627	transmute(src:r)
16628	}
16629
16630	/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16631	///
16632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16633	#[inline]
16634	#[target_feature(enable = "avx512f")]
16635	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16636	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
16637	#[rustc_legacy_const_generics(`2`)]
16638	pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
16639	static_assert_imm8_scale!(SCALE);
16640	let zero: f32x16 = f32x16::ZERO;
16641	let neg_one: i16 = `-1`;
16642	let slice: const i8 = slice as const i8;
16643	let offsets: i32x16 = offsets.as_i32x16();
16644	let r: f32x16 = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
16645	transmute(src:r)
16646	}
16647
16648	/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16649	///
16650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16651	#[inline]
16652	#[target_feature(enable = "avx512f")]
16653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16654	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
16655	#[rustc_legacy_const_generics(`4`)]
16656	pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16657	src: __m512,
16658	mask: __mmask16,
16659	offsets: __m512i,
16660	slice: *const u8,
16661	) -> __m512 {
16662	static_assert_imm8_scale!(SCALE);
16663	let src: f32x16 = src.as_f32x16();
16664	let slice: const i8 = slice as const i8;
16665	let offsets: i32x16 = offsets.as_i32x16();
16666	let r: f32x16 = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16667	transmute(src:r)
16668	}
16669
16670	/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16671	///
16672	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16673	#[inline]
16674	#[target_feature(enable = "avx512f")]
16675	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16676	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
16677	#[rustc_legacy_const_generics(`2`)]
16678	pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16679	offsets: __m512i,
16680	slice: *const u8,
16681	) -> __m512i {
16682	static_assert_imm8_scale!(SCALE);
16683	let zero: i32x16 = i32x16::ZERO;
16684	let neg_one: i16 = `-1`;
16685	let slice: const i8 = slice as const i8;
16686	let offsets: i32x16 = offsets.as_i32x16();
16687	let r: i32x16 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
16688	transmute(src:r)
16689	}
16690
16691	/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16692	///
16693	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16694	#[inline]
16695	#[target_feature(enable = "avx512f")]
16696	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16697	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
16698	#[rustc_legacy_const_generics(`4`)]
16699	pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16700	src: __m512i,
16701	mask: __mmask16,
16702	offsets: __m512i,
16703	slice: *const u8,
16704	) -> __m512i {
16705	static_assert_imm8_scale!(SCALE);
16706	let src: i32x16 = src.as_i32x16();
16707	let mask: i16 = mask as i16;
16708	let slice: const i8 = slice as const i8;
16709	let offsets: i32x16 = offsets.as_i32x16();
16710	let r: i32x16 = vpgatherdd(src, slice, offsets, mask, SCALE);
16711	transmute(src:r)
16712	}
16713
16714	/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16715	///
16716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16717	#[inline]
16718	#[target_feature(enable = "avx512f")]
16719	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16720	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
16721	#[rustc_legacy_const_generics(`2`)]
16722	pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16723	offsets: __m256i,
16724	slice: *const u8,
16725	) -> __m512i {
16726	static_assert_imm8_scale!(SCALE);
16727	let zero: i64x8 = i64x8::ZERO;
16728	let neg_one: i8 = `-1`;
16729	let slice: const i8 = slice as const i8;
16730	let offsets: i32x8 = offsets.as_i32x8();
16731	let r: i64x8 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
16732	transmute(src:r)
16733	}
16734
16735	/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16736	///
16737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16738	#[inline]
16739	#[target_feature(enable = "avx512f")]
16740	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16741	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
16742	#[rustc_legacy_const_generics(`4`)]
16743	pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16744	src: __m512i,
16745	mask: __mmask8,
16746	offsets: __m256i,
16747	slice: *const u8,
16748	) -> __m512i {
16749	static_assert_imm8_scale!(SCALE);
16750	let src: i64x8 = src.as_i64x8();
16751	let mask: i8 = mask as i8;
16752	let slice: const i8 = slice as const i8;
16753	let offsets: i32x8 = offsets.as_i32x8();
16754	let r: i64x8 = vpgatherdq(src, slice, offsets, mask, SCALE);
16755	transmute(src:r)
16756	}
16757
16758	/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16759	///
16760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16761	#[inline]
16762	#[target_feature(enable = "avx512f")]
16763	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16764	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
16765	#[rustc_legacy_const_generics(`2`)]
16766	pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16767	offsets: __m512i,
16768	slice: *const u8,
16769	) -> __m512i {
16770	static_assert_imm8_scale!(SCALE);
16771	let zero: i64x8 = i64x8::ZERO;
16772	let neg_one: i8 = `-1`;
16773	let slice: const i8 = slice as const i8;
16774	let offsets: i64x8 = offsets.as_i64x8();
16775	let r: i64x8 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
16776	transmute(src:r)
16777	}
16778
16779	/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16780	///
16781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16782	#[inline]
16783	#[target_feature(enable = "avx512f")]
16784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16785	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
16786	#[rustc_legacy_const_generics(`4`)]
16787	pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16788	src: __m512i,
16789	mask: __mmask8,
16790	offsets: __m512i,
16791	slice: *const u8,
16792	) -> __m512i {
16793	static_assert_imm8_scale!(SCALE);
16794	let src: i64x8 = src.as_i64x8();
16795	let mask: i8 = mask as i8;
16796	let slice: const i8 = slice as const i8;
16797	let offsets: i64x8 = offsets.as_i64x8();
16798	let r: i64x8 = vpgatherqq(src, slice, offsets, mask, SCALE);
16799	transmute(src:r)
16800	}
16801
16802	/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16803	///
16804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16805	#[inline]
16806	#[target_feature(enable = "avx512f")]
16807	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16808	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
16809	#[rustc_legacy_const_generics(`2`)]
16810	pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16811	offsets: __m512i,
16812	slice: *const u8,
16813	) -> __m256i {
16814	static_assert_imm8_scale!(SCALE);
16815	let zeros: i32x8 = i32x8::ZERO;
16816	let neg_one: i8 = `-1`;
16817	let slice: const i8 = slice as const i8;
16818	let offsets: i64x8 = offsets.as_i64x8();
16819	let r: i32x8 = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
16820	transmute(src:r)
16821	}
16822
16823	/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16824	///
16825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16826	#[inline]
16827	#[target_feature(enable = "avx512f")]
16828	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16829	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
16830	#[rustc_legacy_const_generics(`4`)]
16831	pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16832	src: __m256i,
16833	mask: __mmask8,
16834	offsets: __m512i,
16835	slice: *const u8,
16836	) -> __m256i {
16837	static_assert_imm8_scale!(SCALE);
16838	let src: i32x8 = src.as_i32x8();
16839	let mask: i8 = mask as i8;
16840	let slice: const i8 = slice as const i8;
16841	let offsets: i64x8 = offsets.as_i64x8();
16842	let r: i32x8 = vpgatherqd(src, slice, offsets, mask, SCALE);
16843	transmute(src:r)
16844	}
16845
16846	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16847	///
16848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16849	#[inline]
16850	#[target_feature(enable = "avx512f")]
16851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16852	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
16853	#[rustc_legacy_const_generics(`3`)]
16854	pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16855	slice: *mut u8,
16856	offsets: __m256i,
16857	src: __m512d,
16858	) {
16859	static_assert_imm8_scale!(SCALE);
16860	let src: f64x8 = src.as_f64x8();
16861	let neg_one: i8 = `-1`;
16862	let slice: mut i8 = slice as mut i8;
16863	let offsets: i32x8 = offsets.as_i32x8();
16864	vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
16865	}
16866
16867	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16868	///
16869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16870	#[inline]
16871	#[target_feature(enable = "avx512f")]
16872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16873	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
16874	#[rustc_legacy_const_generics(`4`)]
16875	pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16876	slice: *mut u8,
16877	mask: __mmask8,
16878	offsets: __m256i,
16879	src: __m512d,
16880	) {
16881	static_assert_imm8_scale!(SCALE);
16882	let src: f64x8 = src.as_f64x8();
16883	let slice: mut i8 = slice as mut i8;
16884	let offsets: i32x8 = offsets.as_i32x8();
16885	vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16886	}
16887
16888	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16889	///
16890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16891	#[inline]
16892	#[target_feature(enable = "avx512f")]
16893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16894	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
16895	#[rustc_legacy_const_generics(`3`)]
16896	pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16897	slice: *mut u8,
16898	offsets: __m512i,
16899	src: __m512d,
16900	) {
16901	static_assert_imm8_scale!(SCALE);
16902	let src: f64x8 = src.as_f64x8();
16903	let neg_one: i8 = `-1`;
16904	let slice: mut i8 = slice as mut i8;
16905	let offsets: i64x8 = offsets.as_i64x8();
16906	vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
16907	}
16908
16909	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16910	///
16911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16912	#[inline]
16913	#[target_feature(enable = "avx512f")]
16914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16915	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
16916	#[rustc_legacy_const_generics(`4`)]
16917	pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16918	slice: *mut u8,
16919	mask: __mmask8,
16920	offsets: __m512i,
16921	src: __m512d,
16922	) {
16923	static_assert_imm8_scale!(SCALE);
16924	let src: f64x8 = src.as_f64x8();
16925	let slice: mut i8 = slice as mut i8;
16926	let offsets: i64x8 = offsets.as_i64x8();
16927	vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16928	}
16929
16930	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16931	///
16932	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16933	#[inline]
16934	#[target_feature(enable = "avx512f")]
16935	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16936	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
16937	#[rustc_legacy_const_generics(`3`)]
16938	pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16939	slice: *mut u8,
16940	offsets: __m512i,
16941	src: __m512,
16942	) {
16943	static_assert_imm8_scale!(SCALE);
16944	let src: f32x16 = src.as_f32x16();
16945	let neg_one: i16 = `-1`;
16946	let slice: mut i8 = slice as mut i8;
16947	let offsets: i32x16 = offsets.as_i32x16();
16948	vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
16949	}
16950
16951	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16952	///
16953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16954	#[inline]
16955	#[target_feature(enable = "avx512f")]
16956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16957	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
16958	#[rustc_legacy_const_generics(`4`)]
16959	pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16960	slice: *mut u8,
16961	mask: __mmask16,
16962	offsets: __m512i,
16963	src: __m512,
16964	) {
16965	static_assert_imm8_scale!(SCALE);
16966	let src: f32x16 = src.as_f32x16();
16967	let slice: mut i8 = slice as mut i8;
16968	let offsets: i32x16 = offsets.as_i32x16();
16969	vscatterdps(slice, mask as i16, offsets, src, SCALE);
16970	}
16971
16972	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16973	///
16974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16975	#[inline]
16976	#[target_feature(enable = "avx512f")]
16977	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16978	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
16979	#[rustc_legacy_const_generics(`3`)]
16980	pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16981	slice: *mut u8,
16982	offsets: __m512i,
16983	src: __m256,
16984	) {
16985	static_assert_imm8_scale!(SCALE);
16986	let src: f32x8 = src.as_f32x8();
16987	let neg_one: i8 = `-1`;
16988	let slice: mut i8 = slice as mut i8;
16989	let offsets: i64x8 = offsets.as_i64x8();
16990	vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
16991	}
16992
16993	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16994	///
16995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
16996	#[inline]
16997	#[target_feature(enable = "avx512f")]
16998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16999	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
17000	#[rustc_legacy_const_generics(`4`)]
17001	pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17002	slice: *mut u8,
17003	mask: __mmask8,
17004	offsets: __m512i,
17005	src: __m256,
17006	) {
17007	static_assert_imm8_scale!(SCALE);
17008	let src: f32x8 = src.as_f32x8();
17009	let slice: mut i8 = slice as mut i8;
17010	let offsets: i64x8 = offsets.as_i64x8();
17011	vscatterqps(slice, mask as i8, offsets, src, SCALE);
17012	}
17013
17014	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17015	///
17016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17017	#[inline]
17018	#[target_feature(enable = "avx512f")]
17019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17020	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17021	#[rustc_legacy_const_generics(`3`)]
17022	pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17023	slice: *mut u8,
17024	offsets: __m256i,
17025	src: __m512i,
17026	) {
17027	static_assert_imm8_scale!(SCALE);
17028	let src: i64x8 = src.as_i64x8();
17029	let neg_one: i8 = `-1`;
17030	let slice: mut i8 = slice as mut i8;
17031	let offsets: i32x8 = offsets.as_i32x8();
17032	vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
17033	}
17034
17035	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17036	///
17037	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17038	#[inline]
17039	#[target_feature(enable = "avx512f")]
17040	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17041	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17042	#[rustc_legacy_const_generics(`4`)]
17043	pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17044	slice: *mut u8,
17045	mask: __mmask8,
17046	offsets: __m256i,
17047	src: __m512i,
17048	) {
17049	static_assert_imm8_scale!(SCALE);
17050	let src: i64x8 = src.as_i64x8();
17051	let mask: i8 = mask as i8;
17052	let slice: mut i8 = slice as mut i8;
17053	let offsets: i32x8 = offsets.as_i32x8();
17054	vpscatterdq(slice, mask, offsets, src, SCALE);
17055	}
17056
17057	/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17058	///
17059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17060	#[inline]
17061	#[target_feature(enable = "avx512f")]
17062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17063	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
17064	#[rustc_legacy_const_generics(`3`)]
17065	pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17066	slice: *mut u8,
17067	offsets: __m512i,
17068	src: __m512i,
17069	) {
17070	static_assert_imm8_scale!(SCALE);
17071	let src: i64x8 = src.as_i64x8();
17072	let neg_one: i8 = `-1`;
17073	let slice: mut i8 = slice as mut i8;
17074	let offsets: i64x8 = offsets.as_i64x8();
17075	vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
17076	}
17077
17078	/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17079	///
17080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17081	#[inline]
17082	#[target_feature(enable = "avx512f")]
17083	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17084	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
17085	#[rustc_legacy_const_generics(`4`)]
17086	pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17087	slice: *mut u8,
17088	mask: __mmask8,
17089	offsets: __m512i,
17090	src: __m512i,
17091	) {
17092	static_assert_imm8_scale!(SCALE);
17093	let src: i64x8 = src.as_i64x8();
17094	let mask: i8 = mask as i8;
17095	let slice: mut i8 = slice as mut i8;
17096	let offsets: i64x8 = offsets.as_i64x8();
17097	vpscatterqq(slice, mask, offsets, src, SCALE);
17098	}
17099
17100	/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17101	///
17102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17103	#[inline]
17104	#[target_feature(enable = "avx512f")]
17105	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17106	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17107	#[rustc_legacy_const_generics(`3`)]
17108	pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17109	slice: *mut u8,
17110	offsets: __m512i,
17111	src: __m512i,
17112	) {
17113	static_assert_imm8_scale!(SCALE);
17114	let src: i32x16 = src.as_i32x16();
17115	let neg_one: i16 = `-1`;
17116	let slice: mut i8 = slice as mut i8;
17117	let offsets: i32x16 = offsets.as_i32x16();
17118	vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
17119	}
17120
17121	/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17122	///
17123	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17124	#[inline]
17125	#[target_feature(enable = "avx512f")]
17126	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17127	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17128	#[rustc_legacy_const_generics(`4`)]
17129	pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17130	slice: *mut u8,
17131	mask: __mmask16,
17132	offsets: __m512i,
17133	src: __m512i,
17134	) {
17135	static_assert_imm8_scale!(SCALE);
17136	let src: i32x16 = src.as_i32x16();
17137	let mask: i16 = mask as i16;
17138	let slice: mut i8 = slice as mut i8;
17139	let offsets: i32x16 = offsets.as_i32x16();
17140	vpscatterdd(slice, mask, offsets, src, SCALE);
17141	}
17142
17143	/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17144	///
17145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17146	#[inline]
17147	#[target_feature(enable = "avx512f")]
17148	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17149	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17150	#[rustc_legacy_const_generics(`3`)]
17151	pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17152	slice: *mut u8,
17153	offsets: __m512i,
17154	src: __m256i,
17155	) {
17156	static_assert_imm8_scale!(SCALE);
17157	let src: i32x8 = src.as_i32x8();
17158	let neg_one: i8 = `-1`;
17159	let slice: mut i8 = slice as mut i8;
17160	let offsets: i64x8 = offsets.as_i64x8();
17161	vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
17162	}
17163
17164	/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17165	///
17166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17167	#[inline]
17168	#[target_feature(enable = "avx512f")]
17169	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17170	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17171	#[rustc_legacy_const_generics(`4`)]
17172	pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17173	slice: *mut u8,
17174	mask: __mmask8,
17175	offsets: __m512i,
17176	src: __m256i,
17177	) {
17178	static_assert_imm8_scale!(SCALE);
17179	let src: i32x8 = src.as_i32x8();
17180	let mask: i8 = mask as i8;
17181	let slice: mut i8 = slice as mut i8;
17182	let offsets: i64x8 = offsets.as_i64x8();
17183	vpscatterqd(slice, mask, offsets, src, SCALE);
17184	}
17185
17186	/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17187	/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17188	///
17189	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17190	#[inline]
17191	#[target_feature(enable = "avx512f")]
17192	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17193	#[rustc_legacy_const_generics(`2`)]
17194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17195	pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17196	vindex: __m512i,
17197	base_addr: *const u8,
17198	) -> __m512i {
17199	_mm512_i32gather_epi64::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17200	}
17201
17202	/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17203	/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17204	/// (elements are copied from src when the corresponding mask bit is not set).
17205	///
17206	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17207	#[inline]
17208	#[target_feature(enable = "avx512f")]
17209	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17210	#[rustc_legacy_const_generics(`4`)]
17211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17212	pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17213	src: __m512i,
17214	k: __mmask8,
17215	vindex: __m512i,
17216	base_addr: *const u8,
17217	) -> __m512i {
17218	_mm512_mask_i32gather_epi64::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17219	}
17220
17221	/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17222	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17223	///
17224	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17225	#[inline]
17226	#[target_feature(enable = "avx512f")]
17227	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
17228	#[rustc_legacy_const_generics(`2`)]
17229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17230	pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17231	vindex: __m512i,
17232	base_addr: *const u8,
17233	) -> __m512d {
17234	_mm512_i32gather_pd::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17235	}
17236
17237	/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17238	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17239	/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17240	///
17241	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17242	#[inline]
17243	#[target_feature(enable = "avx512f")]
17244	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
17245	#[rustc_legacy_const_generics(`4`)]
17246	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17247	pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17248	src: __m512d,
17249	k: __mmask8,
17250	vindex: __m512i,
17251	base_addr: *const u8,
17252	) -> __m512d {
17253	_mm512_mask_i32gather_pd::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17254	}
17255
17256	/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17257	/// indices stored in the lower half of vindex scaled by scale.
17258	///
17259	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17260	#[inline]
17261	#[target_feature(enable = "avx512f")]
17262	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17263	#[rustc_legacy_const_generics(`3`)]
17264	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17265	pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17266	base_addr: *mut u8,
17267	vindex: __m512i,
17268	a: __m512i,
17269	) {
17270	_mm512_i32scatter_epi64::<SCALE>(slice:base_addr as _, offsets:_mm512_castsi512_si256(vindex), src:a)
17271	}
17272
17273	/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17274	/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17275	/// mask bit is not set are not written to memory).
17276	///
17277	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17278	#[inline]
17279	#[target_feature(enable = "avx512f")]
17280	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17281	#[rustc_legacy_const_generics(`4`)]
17282	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17283	pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17284	base_addr: *mut u8,
17285	k: __mmask8,
17286	vindex: __m512i,
17287	a: __m512i,
17288	) {
17289	_mm512_mask_i32scatter_epi64::<SCALE>(slice:base_addr as _, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17290	}
17291
17292	/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17293	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17294	///
17295	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17296	#[inline]
17297	#[target_feature(enable = "avx512f")]
17298	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17299	#[rustc_legacy_const_generics(`3`)]
17300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17301	pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17302	base_addr: *mut u8,
17303	vindex: __m512i,
17304	a: __m512d,
17305	) {
17306	_mm512_i32scatter_pd::<SCALE>(slice:base_addr as _, offsets:_mm512_castsi512_si256(vindex), src:a)
17307	}
17308
17309	/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17310	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17311	/// (elements whose corresponding mask bit is not set are not written to memory).
17312	///
17313	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17314	#[inline]
17315	#[target_feature(enable = "avx512f")]
17316	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17317	#[rustc_legacy_const_generics(`4`)]
17318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17319	pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17320	base_addr: *mut u8,
17321	k: __mmask8,
17322	vindex: __m512i,
17323	a: __m512d,
17324	) {
17325	_mm512_mask_i32scatter_pd::<SCALE>(slice:base_addr as _, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17326	}
17327
17328	/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17329	/// indices stored in vindex scaled by scale
17330	///
17331	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17332	#[inline]
17333	#[target_feature(enable = "avx512f,avx512vl")]
17334	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17335	#[rustc_legacy_const_generics(`3`)]
17336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17337	pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17338	base_addr: *mut u8,
17339	vindex: __m256i,
17340	a: __m256i,
17341	) {
17342	static_assert_imm8_scale!(SCALE);
17343	vpscatterdd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17344	}
17345
17346	/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17347	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17348	/// are not written to memory).
17349	///
17350	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17351	#[inline]
17352	#[target_feature(enable = "avx512f,avx512vl")]
17353	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17354	#[rustc_legacy_const_generics(`4`)]
17355	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17356	pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17357	base_addr: *mut u8,
17358	k: __mmask8,
17359	vindex: __m256i,
17360	a: __m256i,
17361	) {
17362	static_assert_imm8_scale!(SCALE);
17363	vpscatterdd_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17364	}
17365
17366	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17367	///
17368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17369	#[inline]
17370	#[target_feature(enable = "avx512f,avx512vl")]
17371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17372	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17373	#[rustc_legacy_const_generics(`3`)]
17374	pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17375	slice: *mut u8,
17376	offsets: __m128i,
17377	src: __m256i,
17378	) {
17379	static_assert_imm8_scale!(SCALE);
17380	let src: i64x4 = src.as_i64x4();
17381	let slice: mut i8 = slice as mut i8;
17382	let offsets: i32x4 = offsets.as_i32x4();
17383	vpscatterdq_256(slice, k:`0xff`, offsets, src, SCALE);
17384	}
17385
17386	/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17387	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17388	/// are not written to memory).
17389	///
17390	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17391	#[inline]
17392	#[target_feature(enable = "avx512f,avx512vl")]
17393	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17394	#[rustc_legacy_const_generics(`4`)]
17395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17396	pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17397	base_addr: *mut u8,
17398	k: __mmask8,
17399	vindex: __m128i,
17400	a: __m256i,
17401	) {
17402	static_assert_imm8_scale!(SCALE);
17403	vpscatterdq_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x4(), SCALE)
17404	}
17405
17406	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17407	/// at packed 32-bit integer indices stored in vindex scaled by scale
17408	///
17409	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17410	#[inline]
17411	#[target_feature(enable = "avx512f,avx512vl")]
17412	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17413	#[rustc_legacy_const_generics(`3`)]
17414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17415	pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17416	base_addr: *mut u8,
17417	vindex: __m128i,
17418	a: __m256d,
17419	) {
17420	static_assert_imm8_scale!(SCALE);
17421	vscatterdpd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17422	}
17423
17424	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17425	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17426	/// mask bit is not set are not written to memory).
17427	///
17428	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17429	#[inline]
17430	#[target_feature(enable = "avx512f,avx512vl")]
17431	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17432	#[rustc_legacy_const_generics(`4`)]
17433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17434	pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17435	base_addr: *mut u8,
17436	k: __mmask8,
17437	vindex: __m128i,
17438	a: __m256d,
17439	) {
17440	static_assert_imm8_scale!(SCALE);
17441	vscatterdpd_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17442	}
17443
17444	/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17445	/// at packed 32-bit integer indices stored in vindex scaled by scale
17446	///
17447	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17448	#[inline]
17449	#[target_feature(enable = "avx512f,avx512vl")]
17450	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17451	#[rustc_legacy_const_generics(`3`)]
17452	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17453	pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17454	base_addr: *mut u8,
17455	vindex: __m256i,
17456	a: __m256,
17457	) {
17458	static_assert_imm8_scale!(SCALE);
17459	vscatterdps_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17460	}
17461
17462	/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17463	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17464	/// mask bit is not set are not written to memory).
17465	///
17466	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17467	#[inline]
17468	#[target_feature(enable = "avx512f,avx512vl")]
17469	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17470	#[rustc_legacy_const_generics(`4`)]
17471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17472	pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17473	base_addr: *mut u8,
17474	k: __mmask8,
17475	vindex: __m256i,
17476	a: __m256,
17477	) {
17478	static_assert_imm8_scale!(SCALE);
17479	vscatterdps_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17480	}
17481
17482	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17483	/// indices stored in vindex scaled by scale
17484	///
17485	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17486	#[inline]
17487	#[target_feature(enable = "avx512f,avx512vl")]
17488	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17489	#[rustc_legacy_const_generics(`3`)]
17490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17491	pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17492	base_addr: *mut u8,
17493	vindex: __m256i,
17494	a: __m128i,
17495	) {
17496	static_assert_imm8_scale!(SCALE);
17497	vpscatterqd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17498	}
17499
17500	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17501	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17502	/// are not written to memory).
17503	///
17504	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17505	#[inline]
17506	#[target_feature(enable = "avx512f,avx512vl")]
17507	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17508	#[rustc_legacy_const_generics(`4`)]
17509	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17510	pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17511	base_addr: *mut u8,
17512	k: __mmask8,
17513	vindex: __m256i,
17514	a: __m128i,
17515	) {
17516	static_assert_imm8_scale!(SCALE);
17517	vpscatterqd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17518	}
17519
17520	/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17521	/// indices stored in vindex scaled by scale
17522	///
17523	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17524	#[inline]
17525	#[target_feature(enable = "avx512f,avx512vl")]
17526	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
17527	#[rustc_legacy_const_generics(`3`)]
17528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17529	pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17530	base_addr: *mut u8,
17531	vindex: __m256i,
17532	a: __m256i,
17533	) {
17534	static_assert_imm8_scale!(SCALE);
17535	vpscatterqq_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
17536	}
17537
17538	/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17539	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17540	/// are not written to memory).
17541	///
17542	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17543	#[inline]
17544	#[target_feature(enable = "avx512f,avx512vl")]
17545	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
17546	#[rustc_legacy_const_generics(`4`)]
17547	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17548	pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17549	base_addr: *mut u8,
17550	k: __mmask8,
17551	vindex: __m256i,
17552	a: __m256i,
17553	) {
17554	static_assert_imm8_scale!(SCALE);
17555	vpscatterqq_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
17556	}
17557
17558	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17559	/// at packed 64-bit integer indices stored in vindex scaled by scale
17560	///
17561	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17562	#[inline]
17563	#[target_feature(enable = "avx512f,avx512vl")]
17564	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
17565	#[rustc_legacy_const_generics(`3`)]
17566	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17567	pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17568	base_addr: *mut u8,
17569	vindex: __m256i,
17570	a: __m256d,
17571	) {
17572	static_assert_imm8_scale!(SCALE);
17573	vscatterqpd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
17574	}
17575
17576	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17577	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17578	/// mask bit is not set are not written to memory).
17579	///
17580	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17581	#[inline]
17582	#[target_feature(enable = "avx512f,avx512vl")]
17583	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
17584	#[rustc_legacy_const_generics(`4`)]
17585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17586	pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17587	base_addr: *mut u8,
17588	k: __mmask8,
17589	vindex: __m256i,
17590	a: __m256d,
17591	) {
17592	static_assert_imm8_scale!(SCALE);
17593	vscatterqpd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
17594	}
17595
17596	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17597	/// at packed 64-bit integer indices stored in vindex scaled by scale
17598	///
17599	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17600	#[inline]
17601	#[target_feature(enable = "avx512f,avx512vl")]
17602	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
17603	#[rustc_legacy_const_generics(`3`)]
17604	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17605	pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17606	base_addr: *mut u8,
17607	vindex: __m256i,
17608	a: __m128,
17609	) {
17610	static_assert_imm8_scale!(SCALE);
17611	vscatterqps_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
17612	}
17613
17614	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17615	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17616	/// mask bit is not set are not written to memory).
17617	///
17618	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17619	#[inline]
17620	#[target_feature(enable = "avx512f,avx512vl")]
17621	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
17622	#[rustc_legacy_const_generics(`4`)]
17623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17624	pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17625	base_addr: *mut u8,
17626	k: __mmask8,
17627	vindex: __m256i,
17628	a: __m128,
17629	) {
17630	static_assert_imm8_scale!(SCALE);
17631	vscatterqps_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
17632	}
17633
17634	/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17635	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17636	/// mask bit is not set).
17637	///
17638	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17639	#[inline]
17640	#[target_feature(enable = "avx512f,avx512vl")]
17641	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
17642	#[rustc_legacy_const_generics(`4`)]
17643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17644	pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17645	src: __m256i,
17646	k: __mmask8,
17647	vindex: __m256i,
17648	base_addr: *const u8,
17649	) -> __m256i {
17650	static_assert_imm8_scale!(SCALE);
17651	transmute(src:vpgatherdd_256(
17652	src.as_i32x8(),
17653	slice:base_addr as _,
17654	offsets:vindex.as_i32x8(),
17655	k,
17656	SCALE,
17657	))
17658	}
17659
17660	/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17661	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17662	/// mask bit is not set).
17663	///
17664	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17665	#[inline]
17666	#[target_feature(enable = "avx512f,avx512vl")]
17667	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17668	#[rustc_legacy_const_generics(`4`)]
17669	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17670	pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17671	src: __m256i,
17672	k: __mmask8,
17673	vindex: __m128i,
17674	base_addr: *const u8,
17675	) -> __m256i {
17676	static_assert_imm8_scale!(SCALE);
17677	transmute(src:vpgatherdq_256(
17678	src.as_i64x4(),
17679	slice:base_addr as _,
17680	offsets:vindex.as_i32x4(),
17681	k,
17682	SCALE,
17683	))
17684	}
17685
17686	/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17687	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17688	/// from src when the corresponding mask bit is not set).
17689	///
17690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17691	#[inline]
17692	#[target_feature(enable = "avx512f,avx512vl")]
17693	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
17694	#[rustc_legacy_const_generics(`4`)]
17695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17696	pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17697	src: __m256d,
17698	k: __mmask8,
17699	vindex: __m128i,
17700	base_addr: *const u8,
17701	) -> __m256d {
17702	static_assert_imm8_scale!(SCALE);
17703	transmute(src:vgatherdpd_256(
17704	src.as_f64x4(),
17705	slice:base_addr as _,
17706	offsets:vindex.as_i32x4(),
17707	k,
17708	SCALE,
17709	))
17710	}
17711
17712	/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17713	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17714	/// from src when the corresponding mask bit is not set).
17715	///
17716	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17717	#[inline]
17718	#[target_feature(enable = "avx512f,avx512vl")]
17719	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
17720	#[rustc_legacy_const_generics(`4`)]
17721	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17722	pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17723	src: __m256,
17724	k: __mmask8,
17725	vindex: __m256i,
17726	base_addr: *const u8,
17727	) -> __m256 {
17728	static_assert_imm8_scale!(SCALE);
17729	transmute(src:vgatherdps_256(
17730	src.as_f32x8(),
17731	slice:base_addr as _,
17732	offsets:vindex.as_i32x8(),
17733	k,
17734	SCALE,
17735	))
17736	}
17737
17738	/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17739	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17740	/// mask bit is not set).
17741	///
17742	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17743	#[inline]
17744	#[target_feature(enable = "avx512f,avx512vl")]
17745	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
17746	#[rustc_legacy_const_generics(`4`)]
17747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17748	pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17749	src: __m128i,
17750	k: __mmask8,
17751	vindex: __m256i,
17752	base_addr: *const u8,
17753	) -> __m128i {
17754	static_assert_imm8_scale!(SCALE);
17755	transmute(src:vpgatherqd_256(
17756	src.as_i32x4(),
17757	slice:base_addr as _,
17758	offsets:vindex.as_i64x4(),
17759	k,
17760	SCALE,
17761	))
17762	}
17763
17764	/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17765	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17766	/// mask bit is not set).
17767	///
17768	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17769	#[inline]
17770	#[target_feature(enable = "avx512f,avx512vl")]
17771	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
17772	#[rustc_legacy_const_generics(`4`)]
17773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17774	pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17775	src: __m256i,
17776	k: __mmask8,
17777	vindex: __m256i,
17778	base_addr: *const u8,
17779	) -> __m256i {
17780	static_assert_imm8_scale!(SCALE);
17781	transmute(src:vpgatherqq_256(
17782	src.as_i64x4(),
17783	slice:base_addr as _,
17784	offsets:vindex.as_i64x4(),
17785	k,
17786	SCALE,
17787	))
17788	}
17789
17790	/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17791	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17792	/// from src when the corresponding mask bit is not set).
17793	///
17794	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17795	#[inline]
17796	#[target_feature(enable = "avx512f,avx512vl")]
17797	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
17798	#[rustc_legacy_const_generics(`4`)]
17799	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17800	pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17801	src: __m256d,
17802	k: __mmask8,
17803	vindex: __m256i,
17804	base_addr: *const u8,
17805	) -> __m256d {
17806	static_assert_imm8_scale!(SCALE);
17807	transmute(src:vgatherqpd_256(
17808	src.as_f64x4(),
17809	slice:base_addr as _,
17810	offsets:vindex.as_i64x4(),
17811	k,
17812	SCALE,
17813	))
17814	}
17815
17816	/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17817	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17818	/// from src when the corresponding mask bit is not set).
17819	///
17820	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17821	#[inline]
17822	#[target_feature(enable = "avx512f,avx512vl")]
17823	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
17824	#[rustc_legacy_const_generics(`4`)]
17825	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17826	pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17827	src: __m128,
17828	k: __mmask8,
17829	vindex: __m256i,
17830	base_addr: *const u8,
17831	) -> __m128 {
17832	static_assert_imm8_scale!(SCALE);
17833	transmute(src:vgatherqps_256(
17834	src.as_f32x4(),
17835	slice:base_addr as _,
17836	offsets:vindex.as_i64x4(),
17837	k,
17838	SCALE,
17839	))
17840	}
17841
17842	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17843	/// indices stored in vindex scaled by scale
17844	///
17845	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17846	#[inline]
17847	#[target_feature(enable = "avx512f,avx512vl")]
17848	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17849	#[rustc_legacy_const_generics(`3`)]
17850	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17851	pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17852	base_addr: *mut u8,
17853	vindex: __m128i,
17854	a: __m128i,
17855	) {
17856	static_assert_imm8_scale!(SCALE);
17857	vpscatterdd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
17858	}
17859
17860	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17861	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17862	/// are not written to memory).
17863	///
17864	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17865	#[inline]
17866	#[target_feature(enable = "avx512f,avx512vl")]
17867	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17868	#[rustc_legacy_const_generics(`4`)]
17869	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17870	pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17871	base_addr: *mut u8,
17872	k: __mmask8,
17873	vindex: __m128i,
17874	a: __m128i,
17875	) {
17876	static_assert_imm8_scale!(SCALE);
17877	vpscatterdd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
17878	}
17879
17880	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17881	/// indices stored in vindex scaled by scale
17882	///
17883	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17884	#[inline]
17885	#[target_feature(enable = "avx512f,avx512vl")]
17886	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17887	#[rustc_legacy_const_generics(`3`)]
17888	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17889	pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17890	base_addr: *mut u8,
17891	vindex: __m128i,
17892	a: __m128i,
17893	) {
17894	static_assert_imm8_scale!(SCALE);
17895	vpscatterdq_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
17896	}
17897
17898	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17899	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17900	/// are not written to memory).
17901	///
17902	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17903	#[inline]
17904	#[target_feature(enable = "avx512f,avx512vl")]
17905	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17906	#[rustc_legacy_const_generics(`4`)]
17907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17908	pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17909	base_addr: *mut u8,
17910	k: __mmask8,
17911	vindex: __m128i,
17912	a: __m128i,
17913	) {
17914	static_assert_imm8_scale!(SCALE);
17915	vpscatterdq_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
17916	}
17917
17918	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17919	/// at packed 32-bit integer indices stored in vindex scaled by scale
17920	///
17921	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17922	#[inline]
17923	#[target_feature(enable = "avx512f,avx512vl")]
17924	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17925	#[rustc_legacy_const_generics(`3`)]
17926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17927	pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
17928	static_assert_imm8_scale!(SCALE);
17929	vscatterdpd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
17930	}
17931
17932	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17933	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17934	/// mask bit is not set are not written to memory).
17935	///
17936	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
17937	#[inline]
17938	#[target_feature(enable = "avx512f,avx512vl")]
17939	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17940	#[rustc_legacy_const_generics(`4`)]
17941	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17942	pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
17943	base_addr: *mut u8,
17944	k: __mmask8,
17945	vindex: __m128i,
17946	a: __m128d,
17947	) {
17948	static_assert_imm8_scale!(SCALE);
17949	vscatterdpd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
17950	}
17951
17952	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17953	/// at packed 32-bit integer indices stored in vindex scaled by scale
17954	///
17955	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
17956	#[inline]
17957	#[target_feature(enable = "avx512f,avx512vl")]
17958	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17959	#[rustc_legacy_const_generics(`3`)]
17960	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17961	pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
17962	static_assert_imm8_scale!(SCALE);
17963	vscatterdps_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
17964	}
17965
17966	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17967	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17968	/// mask bit is not set are not written to memory).
17969	///
17970	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
17971	#[inline]
17972	#[target_feature(enable = "avx512f,avx512vl")]
17973	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17974	#[rustc_legacy_const_generics(`4`)]
17975	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17976	pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
17977	base_addr: *mut u8,
17978	k: __mmask8,
17979	vindex: __m128i,
17980	a: __m128,
17981	) {
17982	static_assert_imm8_scale!(SCALE);
17983	vscatterdps_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
17984	}
17985
17986	/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17987	/// indices stored in vindex scaled by scale
17988	///
17989	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
17990	#[inline]
17991	#[target_feature(enable = "avx512f,avx512vl")]
17992	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17993	#[rustc_legacy_const_generics(`3`)]
17994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17995	pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
17996	base_addr: *mut u8,
17997	vindex: __m128i,
17998	a: __m128i,
17999	) {
18000	static_assert_imm8_scale!(SCALE);
18001	vpscatterqd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18002	}
18003
18004	/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18005	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18006	/// are not written to memory).
18007	///
18008	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18009	#[inline]
18010	#[target_feature(enable = "avx512f,avx512vl")]
18011	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
18012	#[rustc_legacy_const_generics(`4`)]
18013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18014	pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18015	base_addr: *mut u8,
18016	k: __mmask8,
18017	vindex: __m128i,
18018	a: __m128i,
18019	) {
18020	static_assert_imm8_scale!(SCALE);
18021	vpscatterqd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18022	}
18023
18024	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18025	/// indices stored in vindex scaled by scale
18026	///
18027	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18028	#[inline]
18029	#[target_feature(enable = "avx512f,avx512vl")]
18030	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
18031	#[rustc_legacy_const_generics(`3`)]
18032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18033	pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18034	base_addr: *mut u8,
18035	vindex: __m128i,
18036	a: __m128i,
18037	) {
18038	static_assert_imm8_scale!(SCALE);
18039	vpscatterqq_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18040	}
18041
18042	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18043	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18044	/// are not written to memory).
18045	///
18046	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18047	#[inline]
18048	#[target_feature(enable = "avx512f,avx512vl")]
18049	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
18050	#[rustc_legacy_const_generics(`4`)]
18051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18052	pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18053	base_addr: *mut u8,
18054	k: __mmask8,
18055	vindex: __m128i,
18056	a: __m128i,
18057	) {
18058	static_assert_imm8_scale!(SCALE);
18059	vpscatterqq_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18060	}
18061
18062	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18063	/// at packed 64-bit integer indices stored in vindex scaled by scale
18064	///
18065	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18066	#[inline]
18067	#[target_feature(enable = "avx512f,avx512vl")]
18068	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
18069	#[rustc_legacy_const_generics(`3`)]
18070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18071	pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
18072	static_assert_imm8_scale!(SCALE);
18073	vscatterqpd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18074	}
18075
18076	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18077	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18078	/// mask bit is not set are not written to memory).
18079	///
18080	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18081	#[inline]
18082	#[target_feature(enable = "avx512f,avx512vl")]
18083	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
18084	#[rustc_legacy_const_generics(`4`)]
18085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18086	pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18087	base_addr: *mut u8,
18088	k: __mmask8,
18089	vindex: __m128i,
18090	a: __m128d,
18091	) {
18092	static_assert_imm8_scale!(SCALE);
18093	vscatterqpd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18094	}
18095
18096	/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18097	/// at packed 64-bit integer indices stored in vindex scaled by scale
18098	///
18099	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18100	#[inline]
18101	#[target_feature(enable = "avx512f,avx512vl")]
18102	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
18103	#[rustc_legacy_const_generics(`3`)]
18104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18105	pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
18106	static_assert_imm8_scale!(SCALE);
18107	vscatterqps_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18108	}
18109
18110	/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18111	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18112	///
18113	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18114	#[inline]
18115	#[target_feature(enable = "avx512f,avx512vl")]
18116	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
18117	#[rustc_legacy_const_generics(`4`)]
18118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18119	pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18120	base_addr: *mut u8,
18121	k: __mmask8,
18122	vindex: __m128i,
18123	a: __m128,
18124	) {
18125	static_assert_imm8_scale!(SCALE);
18126	vscatterqps_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18127	}
18128
18129	/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18130	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18131	/// mask bit is not set).
18132	///
18133	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18134	#[inline]
18135	#[target_feature(enable = "avx512f,avx512vl")]
18136	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
18137	#[rustc_legacy_const_generics(`4`)]
18138	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18139	pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18140	src: __m128i,
18141	k: __mmask8,
18142	vindex: __m128i,
18143	base_addr: *const u8,
18144	) -> __m128i {
18145	static_assert_imm8_scale!(SCALE);
18146	transmute(src:vpgatherdd_128(
18147	src.as_i32x4(),
18148	slice:base_addr as _,
18149	offsets:vindex.as_i32x4(),
18150	k,
18151	SCALE,
18152	))
18153	}
18154
18155	/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18156	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18157	/// mask bit is not set).
18158	///
18159	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18160	#[inline]
18161	#[target_feature(enable = "avx512f,avx512vl")]
18162	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
18163	#[rustc_legacy_const_generics(`4`)]
18164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18165	pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18166	src: __m128i,
18167	k: __mmask8,
18168	vindex: __m128i,
18169	base_addr: *const u8,
18170	) -> __m128i {
18171	static_assert_imm8_scale!(SCALE);
18172	transmute(src:vpgatherdq_128(
18173	src.as_i64x2(),
18174	slice:base_addr as _,
18175	offsets:vindex.as_i32x4(),
18176	k,
18177	SCALE,
18178	))
18179	}
18180
18181	/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18182	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18183	/// from src when the corresponding mask bit is not set).
18184	///
18185	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18186	#[inline]
18187	#[target_feature(enable = "avx512f,avx512vl")]
18188	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
18189	#[rustc_legacy_const_generics(`4`)]
18190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18191	pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18192	src: __m128d,
18193	k: __mmask8,
18194	vindex: __m128i,
18195	base_addr: *const u8,
18196	) -> __m128d {
18197	static_assert_imm8_scale!(SCALE);
18198	transmute(src:vgatherdpd_128(
18199	src.as_f64x2(),
18200	slice:base_addr as _,
18201	offsets:vindex.as_i32x4(),
18202	k,
18203	SCALE,
18204	))
18205	}
18206
18207	/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18208	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18209	/// from src when the corresponding mask bit is not set).
18210	///
18211	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18212	#[inline]
18213	#[target_feature(enable = "avx512f,avx512vl")]
18214	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
18215	#[rustc_legacy_const_generics(`4`)]
18216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18217	pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18218	src: __m128,
18219	k: __mmask8,
18220	vindex: __m128i,
18221	base_addr: *const u8,
18222	) -> __m128 {
18223	static_assert_imm8_scale!(SCALE);
18224	transmute(src:vgatherdps_128(
18225	src.as_f32x4(),
18226	slice:base_addr as _,
18227	offsets:vindex.as_i32x4(),
18228	k,
18229	SCALE,
18230	))
18231	}
18232
18233	/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18234	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18235	/// mask bit is not set).
18236	///
18237	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18238	#[inline]
18239	#[target_feature(enable = "avx512f,avx512vl")]
18240	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
18241	#[rustc_legacy_const_generics(`4`)]
18242	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18243	pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18244	src: __m128i,
18245	k: __mmask8,
18246	vindex: __m128i,
18247	base_addr: *const u8,
18248	) -> __m128i {
18249	static_assert_imm8_scale!(SCALE);
18250	transmute(src:vpgatherqd_128(
18251	src.as_i32x4(),
18252	slice:base_addr as _,
18253	offsets:vindex.as_i64x2(),
18254	k,
18255	SCALE,
18256	))
18257	}
18258
18259	/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18260	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18261	/// mask bit is not set).
18262	///
18263	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18264	#[inline]
18265	#[target_feature(enable = "avx512f,avx512vl")]
18266	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
18267	#[rustc_legacy_const_generics(`4`)]
18268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18269	pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18270	src: __m128i,
18271	k: __mmask8,
18272	vindex: __m128i,
18273	base_addr: *const u8,
18274	) -> __m128i {
18275	static_assert_imm8_scale!(SCALE);
18276	transmute(src:vpgatherqq_128(
18277	src.as_i64x2(),
18278	slice:base_addr as _,
18279	offsets:vindex.as_i64x2(),
18280	k,
18281	SCALE,
18282	))
18283	}
18284
18285	/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18286	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18287	/// from src when the corresponding mask bit is not set).
18288	///
18289	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18290	#[inline]
18291	#[target_feature(enable = "avx512f,avx512vl")]
18292	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
18293	#[rustc_legacy_const_generics(`4`)]
18294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18295	pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18296	src: __m128d,
18297	k: __mmask8,
18298	vindex: __m128i,
18299	base_addr: *const u8,
18300	) -> __m128d {
18301	static_assert_imm8_scale!(SCALE);
18302	transmute(src:vgatherqpd_128(
18303	src.as_f64x2(),
18304	slice:base_addr as _,
18305	offsets:vindex.as_i64x2(),
18306	k,
18307	SCALE,
18308	))
18309	}
18310
18311	/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18312	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18313	/// from src when the corresponding mask bit is not set).
18314	///
18315	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18316	#[inline]
18317	#[target_feature(enable = "avx512f,avx512vl")]
18318	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
18319	#[rustc_legacy_const_generics(`4`)]
18320	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18321	pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18322	src: __m128,
18323	k: __mmask8,
18324	vindex: __m128i,
18325	base_addr: *const u8,
18326	) -> __m128 {
18327	static_assert_imm8_scale!(SCALE);
18328	transmute(src:vgatherqps_128(
18329	src.as_f32x4(),
18330	slice:base_addr as _,
18331	offsets:vindex.as_i64x2(),
18332	k,
18333	SCALE,
18334	))
18335	}
18336
18337	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18338	///
18339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18340	#[inline]
18341	#[target_feature(enable = "avx512f")]
18342	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18343	#[cfg_attr(test, assert_instr(vpcompressd))]
18344	pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18345	unsafe { transmute(src:vpcompressd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18346	}
18347
18348	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18349	///
18350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18351	#[inline]
18352	#[target_feature(enable = "avx512f")]
18353	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18354	#[cfg_attr(test, assert_instr(vpcompressd))]
18355	pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18356	unsafe { transmute(src:vpcompressd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18357	}
18358
18359	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18360	///
18361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18362	#[inline]
18363	#[target_feature(enable = "avx512f,avx512vl")]
18364	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18365	#[cfg_attr(test, assert_instr(vpcompressd))]
18366	pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18367	unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18368	}
18369
18370	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18371	///
18372	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18373	#[inline]
18374	#[target_feature(enable = "avx512f,avx512vl")]
18375	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18376	#[cfg_attr(test, assert_instr(vpcompressd))]
18377	pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18378	unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18379	}
18380
18381	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18382	///
18383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18384	#[inline]
18385	#[target_feature(enable = "avx512f,avx512vl")]
18386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18387	#[cfg_attr(test, assert_instr(vpcompressd))]
18388	pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18389	unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18390	}
18391
18392	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18393	///
18394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18395	#[inline]
18396	#[target_feature(enable = "avx512f,avx512vl")]
18397	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18398	#[cfg_attr(test, assert_instr(vpcompressd))]
18399	pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18400	unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18401	}
18402
18403	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18404	///
18405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18406	#[inline]
18407	#[target_feature(enable = "avx512f")]
18408	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18409	#[cfg_attr(test, assert_instr(vpcompressq))]
18410	pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18411	unsafe { transmute(src:vpcompressq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18412	}
18413
18414	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18415	///
18416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18417	#[inline]
18418	#[target_feature(enable = "avx512f")]
18419	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18420	#[cfg_attr(test, assert_instr(vpcompressq))]
18421	pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18422	unsafe { transmute(src:vpcompressq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18423	}
18424
18425	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18426	///
18427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18428	#[inline]
18429	#[target_feature(enable = "avx512f,avx512vl")]
18430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18431	#[cfg_attr(test, assert_instr(vpcompressq))]
18432	pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18433	unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18434	}
18435
18436	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18437	///
18438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18439	#[inline]
18440	#[target_feature(enable = "avx512f,avx512vl")]
18441	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18442	#[cfg_attr(test, assert_instr(vpcompressq))]
18443	pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18444	unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18445	}
18446
18447	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18448	///
18449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18450	#[inline]
18451	#[target_feature(enable = "avx512f,avx512vl")]
18452	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18453	#[cfg_attr(test, assert_instr(vpcompressq))]
18454	pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18455	unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18456	}
18457
18458	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18459	///
18460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18461	#[inline]
18462	#[target_feature(enable = "avx512f,avx512vl")]
18463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18464	#[cfg_attr(test, assert_instr(vpcompressq))]
18465	pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18466	unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18467	}
18468
18469	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18470	///
18471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18472	#[inline]
18473	#[target_feature(enable = "avx512f")]
18474	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18475	#[cfg_attr(test, assert_instr(vcompressps))]
18476	pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18477	unsafe { transmute(src:vcompressps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18478	}
18479
18480	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18481	///
18482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18483	#[inline]
18484	#[target_feature(enable = "avx512f")]
18485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18486	#[cfg_attr(test, assert_instr(vcompressps))]
18487	pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18488	unsafe { transmute(src:vcompressps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18489	}
18490
18491	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18492	///
18493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18494	#[inline]
18495	#[target_feature(enable = "avx512f,avx512vl")]
18496	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18497	#[cfg_attr(test, assert_instr(vcompressps))]
18498	pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18499	unsafe { transmute(src:vcompressps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18500	}
18501
18502	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18503	///
18504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18505	#[inline]
18506	#[target_feature(enable = "avx512f,avx512vl")]
18507	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18508	#[cfg_attr(test, assert_instr(vcompressps))]
18509	pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18510	unsafe { transmute(src:vcompressps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18511	}
18512
18513	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18514	///
18515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18516	#[inline]
18517	#[target_feature(enable = "avx512f,avx512vl")]
18518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18519	#[cfg_attr(test, assert_instr(vcompressps))]
18520	pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18521	unsafe { transmute(src:vcompressps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
18522	}
18523
18524	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18525	///
18526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18527	#[inline]
18528	#[target_feature(enable = "avx512f,avx512vl")]
18529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18530	#[cfg_attr(test, assert_instr(vcompressps))]
18531	pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18532	unsafe { transmute(src:vcompressps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
18533	}
18534
18535	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18536	///
18537	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18538	#[inline]
18539	#[target_feature(enable = "avx512f")]
18540	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18541	#[cfg_attr(test, assert_instr(vcompresspd))]
18542	pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18543	unsafe { transmute(src:vcompresspd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
18544	}
18545
18546	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18547	///
18548	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18549	#[inline]
18550	#[target_feature(enable = "avx512f")]
18551	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18552	#[cfg_attr(test, assert_instr(vcompresspd))]
18553	pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18554	unsafe { transmute(src:vcompresspd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
18555	}
18556
18557	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18558	///
18559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18560	#[inline]
18561	#[target_feature(enable = "avx512f,avx512vl")]
18562	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18563	#[cfg_attr(test, assert_instr(vcompresspd))]
18564	pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18565	unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
18566	}
18567
18568	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18569	///
18570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18571	#[inline]
18572	#[target_feature(enable = "avx512f,avx512vl")]
18573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18574	#[cfg_attr(test, assert_instr(vcompresspd))]
18575	pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18576	unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
18577	}
18578
18579	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18580	///
18581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18582	#[inline]
18583	#[target_feature(enable = "avx512f,avx512vl")]
18584	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18585	#[cfg_attr(test, assert_instr(vcompresspd))]
18586	pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18587	unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
18588	}
18589
18590	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18591	///
18592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18593	#[inline]
18594	#[target_feature(enable = "avx512f,avx512vl")]
18595	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18596	#[cfg_attr(test, assert_instr(vcompresspd))]
18597	pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18598	unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
18599	}
18600
18601	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18602	///
18603	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18604	#[inline]
18605	#[target_feature(enable = "avx512f")]
18606	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18607	#[cfg_attr(test, assert_instr(vpcompressd))]
18608	pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
18609	vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
18610	}
18611
18612	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18613	///
18614	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18615	#[inline]
18616	#[target_feature(enable = "avx512f,avx512vl")]
18617	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18618	#[cfg_attr(test, assert_instr(vpcompressd))]
18619	pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18620	vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
18621	}
18622
18623	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18624	///
18625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18626	#[inline]
18627	#[target_feature(enable = "avx512f,avx512vl")]
18628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18629	#[cfg_attr(test, assert_instr(vpcompressd))]
18630	pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18631	vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
18632	}
18633
18634	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18635	///
18636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18637	#[inline]
18638	#[target_feature(enable = "avx512f")]
18639	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18640	#[cfg_attr(test, assert_instr(vpcompressq))]
18641	pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
18642	vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
18643	}
18644
18645	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18646	///
18647	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18648	#[inline]
18649	#[target_feature(enable = "avx512f,avx512vl")]
18650	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18651	#[cfg_attr(test, assert_instr(vpcompressq))]
18652	pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18653	vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
18654	}
18655
18656	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18657	///
18658	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18659	#[inline]
18660	#[target_feature(enable = "avx512f,avx512vl")]
18661	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18662	#[cfg_attr(test, assert_instr(vpcompressq))]
18663	pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18664	vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
18665	}
18666
18667	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18668	///
18669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18670	#[inline]
18671	#[target_feature(enable = "avx512f")]
18672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18673	#[cfg_attr(test, assert_instr(vcompressps))]
18674	pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
18675	vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
18676	}
18677
18678	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18679	///
18680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18681	#[inline]
18682	#[target_feature(enable = "avx512f,avx512vl")]
18683	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18684	#[cfg_attr(test, assert_instr(vcompressps))]
18685	pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
18686	vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
18687	}
18688
18689	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18690	///
18691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18692	#[inline]
18693	#[target_feature(enable = "avx512f,avx512vl")]
18694	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18695	#[cfg_attr(test, assert_instr(vcompressps))]
18696	pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
18697	vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
18698	}
18699
18700	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18701	///
18702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18703	#[inline]
18704	#[target_feature(enable = "avx512f")]
18705	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18706	#[cfg_attr(test, assert_instr(vcompresspd))]
18707	pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
18708	vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
18709	}
18710
18711	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18712	///
18713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18714	#[inline]
18715	#[target_feature(enable = "avx512f,avx512vl")]
18716	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18717	#[cfg_attr(test, assert_instr(vcompresspd))]
18718	pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
18719	vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
18720	}
18721
18722	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18723	///
18724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18725	#[inline]
18726	#[target_feature(enable = "avx512f,avx512vl")]
18727	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18728	#[cfg_attr(test, assert_instr(vcompresspd))]
18729	pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
18730	vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
18731	}
18732
18733	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18734	///
18735	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18736	#[inline]
18737	#[target_feature(enable = "avx512f")]
18738	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18739	#[cfg_attr(test, assert_instr(vpexpandd))]
18740	pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18741	unsafe { transmute(src:vpexpandd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18742	}
18743
18744	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18745	///
18746	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18747	#[inline]
18748	#[target_feature(enable = "avx512f")]
18749	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18750	#[cfg_attr(test, assert_instr(vpexpandd))]
18751	pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18752	unsafe { transmute(src:vpexpandd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18753	}
18754
18755	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18756	///
18757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18758	#[inline]
18759	#[target_feature(enable = "avx512f,avx512vl")]
18760	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18761	#[cfg_attr(test, assert_instr(vpexpandd))]
18762	pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18763	unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18764	}
18765
18766	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18767	///
18768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18769	#[inline]
18770	#[target_feature(enable = "avx512f,avx512vl")]
18771	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18772	#[cfg_attr(test, assert_instr(vpexpandd))]
18773	pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18774	unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18775	}
18776
18777	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18778	///
18779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18780	#[inline]
18781	#[target_feature(enable = "avx512f,avx512vl")]
18782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18783	#[cfg_attr(test, assert_instr(vpexpandd))]
18784	pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18785	unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18786	}
18787
18788	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18789	///
18790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18791	#[inline]
18792	#[target_feature(enable = "avx512f,avx512vl")]
18793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18794	#[cfg_attr(test, assert_instr(vpexpandd))]
18795	pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18796	unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18797	}
18798
18799	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18800	///
18801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18802	#[inline]
18803	#[target_feature(enable = "avx512f")]
18804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18805	#[cfg_attr(test, assert_instr(vpexpandq))]
18806	pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18807	unsafe { transmute(src:vpexpandq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18808	}
18809
18810	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18811	///
18812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18813	#[inline]
18814	#[target_feature(enable = "avx512f")]
18815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18816	#[cfg_attr(test, assert_instr(vpexpandq))]
18817	pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18818	unsafe { transmute(src:vpexpandq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18819	}
18820
18821	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18822	///
18823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18824	#[inline]
18825	#[target_feature(enable = "avx512f,avx512vl")]
18826	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18827	#[cfg_attr(test, assert_instr(vpexpandq))]
18828	pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18829	unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18830	}
18831
18832	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18833	///
18834	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18835	#[inline]
18836	#[target_feature(enable = "avx512f,avx512vl")]
18837	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18838	#[cfg_attr(test, assert_instr(vpexpandq))]
18839	pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18840	unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18841	}
18842
18843	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18844	///
18845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18846	#[inline]
18847	#[target_feature(enable = "avx512f,avx512vl")]
18848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18849	#[cfg_attr(test, assert_instr(vpexpandq))]
18850	pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18851	unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18852	}
18853
18854	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18855	///
18856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18857	#[inline]
18858	#[target_feature(enable = "avx512f,avx512vl")]
18859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18860	#[cfg_attr(test, assert_instr(vpexpandq))]
18861	pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18862	unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18863	}
18864
18865	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18866	///
18867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18868	#[inline]
18869	#[target_feature(enable = "avx512f")]
18870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18871	#[cfg_attr(test, assert_instr(vexpandps))]
18872	pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18873	unsafe { transmute(src:vexpandps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18874	}
18875
18876	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18877	///
18878	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18879	#[inline]
18880	#[target_feature(enable = "avx512f")]
18881	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18882	#[cfg_attr(test, assert_instr(vexpandps))]
18883	pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18884	unsafe { transmute(src:vexpandps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18885	}
18886
18887	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18888	///
18889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18890	#[inline]
18891	#[target_feature(enable = "avx512f,avx512vl")]
18892	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18893	#[cfg_attr(test, assert_instr(vexpandps))]
18894	pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18895	unsafe { transmute(src:vexpandps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18896	}
18897
18898	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18899	///
18900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18901	#[inline]
18902	#[target_feature(enable = "avx512f,avx512vl")]
18903	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18904	#[cfg_attr(test, assert_instr(vexpandps))]
18905	pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18906	unsafe { transmute(src:vexpandps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18907	}
18908
18909	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18910	///
18911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18912	#[inline]
18913	#[target_feature(enable = "avx512f,avx512vl")]
18914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18915	#[cfg_attr(test, assert_instr(vexpandps))]
18916	pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18917	unsafe { transmute(src:vexpandps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
18918	}
18919
18920	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18921	///
18922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18923	#[inline]
18924	#[target_feature(enable = "avx512f,avx512vl")]
18925	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18926	#[cfg_attr(test, assert_instr(vexpandps))]
18927	pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
18928	unsafe { transmute(src:vexpandps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
18929	}
18930
18931	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18932	///
18933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
18934	#[inline]
18935	#[target_feature(enable = "avx512f")]
18936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18937	#[cfg_attr(test, assert_instr(vexpandpd))]
18938	pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18939	unsafe { transmute(src:vexpandpd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
18940	}
18941
18942	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18943	///
18944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
18945	#[inline]
18946	#[target_feature(enable = "avx512f")]
18947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18948	#[cfg_attr(test, assert_instr(vexpandpd))]
18949	pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
18950	unsafe { transmute(src:vexpandpd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
18951	}
18952
18953	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18954	///
18955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
18956	#[inline]
18957	#[target_feature(enable = "avx512f,avx512vl")]
18958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18959	#[cfg_attr(test, assert_instr(vexpandpd))]
18960	pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18961	unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
18962	}
18963
18964	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18965	///
18966	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
18967	#[inline]
18968	#[target_feature(enable = "avx512f,avx512vl")]
18969	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18970	#[cfg_attr(test, assert_instr(vexpandpd))]
18971	pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
18972	unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
18973	}
18974
18975	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18976	///
18977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
18978	#[inline]
18979	#[target_feature(enable = "avx512f,avx512vl")]
18980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18981	#[cfg_attr(test, assert_instr(vexpandpd))]
18982	pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18983	unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
18984	}
18985
18986	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18987	///
18988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
18989	#[inline]
18990	#[target_feature(enable = "avx512f,avx512vl")]
18991	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18992	#[cfg_attr(test, assert_instr(vexpandpd))]
18993	pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
18994	unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
18995	}
18996
18997	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18998	///
18999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19000	#[inline]
19001	#[target_feature(enable = "avx512f")]
19002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19003	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19004	#[rustc_legacy_const_generics(`1`)]
19005	pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19006	unsafe {
19007	static_assert_uimm_bits!(IMM8, `8`);
19008	let a: i32x16 = a.as_i32x16();
19009	let r: i32x16 = vprold(a, IMM8);
19010	transmute(src:r)
19011	}
19012	}
19013
19014	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19015	///
19016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19017	#[inline]
19018	#[target_feature(enable = "avx512f")]
19019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19020	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19021	#[rustc_legacy_const_generics(`3`)]
19022	pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19023	unsafe {
19024	static_assert_uimm_bits!(IMM8, `8`);
19025	let a: i32x16 = a.as_i32x16();
19026	let r: i32x16 = vprold(a, IMM8);
19027	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19028	}
19029	}
19030
19031	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19032	///
19033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19034	#[inline]
19035	#[target_feature(enable = "avx512f")]
19036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19037	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19038	#[rustc_legacy_const_generics(`2`)]
19039	pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19040	unsafe {
19041	static_assert_uimm_bits!(IMM8, `8`);
19042	let a: i32x16 = a.as_i32x16();
19043	let r: i32x16 = vprold(a, IMM8);
19044	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
19045	}
19046	}
19047
19048	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19049	///
19050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19051	#[inline]
19052	#[target_feature(enable = "avx512f,avx512vl")]
19053	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19054	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19055	#[rustc_legacy_const_generics(`1`)]
19056	pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19057	unsafe {
19058	static_assert_uimm_bits!(IMM8, `8`);
19059	let a: i32x8 = a.as_i32x8();
19060	let r: i32x8 = vprold256(a, IMM8);
19061	transmute(src:r)
19062	}
19063	}
19064
19065	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19066	///
19067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19068	#[inline]
19069	#[target_feature(enable = "avx512f,avx512vl")]
19070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19071	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19072	#[rustc_legacy_const_generics(`3`)]
19073	pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19074	unsafe {
19075	static_assert_uimm_bits!(IMM8, `8`);
19076	let a: i32x8 = a.as_i32x8();
19077	let r: i32x8 = vprold256(a, IMM8);
19078	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19079	}
19080	}
19081
19082	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19083	///
19084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19085	#[inline]
19086	#[target_feature(enable = "avx512f,avx512vl")]
19087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19088	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19089	#[rustc_legacy_const_generics(`2`)]
19090	pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19091	unsafe {
19092	static_assert_uimm_bits!(IMM8, `8`);
19093	let a: i32x8 = a.as_i32x8();
19094	let r: i32x8 = vprold256(a, IMM8);
19095	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
19096	}
19097	}
19098
19099	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19100	///
19101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19102	#[inline]
19103	#[target_feature(enable = "avx512f,avx512vl")]
19104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19105	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19106	#[rustc_legacy_const_generics(`1`)]
19107	pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19108	unsafe {
19109	static_assert_uimm_bits!(IMM8, `8`);
19110	let a: i32x4 = a.as_i32x4();
19111	let r: i32x4 = vprold128(a, IMM8);
19112	transmute(src:r)
19113	}
19114	}
19115
19116	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19117	///
19118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19119	#[inline]
19120	#[target_feature(enable = "avx512f,avx512vl")]
19121	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19122	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19123	#[rustc_legacy_const_generics(`3`)]
19124	pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19125	unsafe {
19126	static_assert_uimm_bits!(IMM8, `8`);
19127	let a: i32x4 = a.as_i32x4();
19128	let r: i32x4 = vprold128(a, IMM8);
19129	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19130	}
19131	}
19132
19133	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19134	///
19135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19136	#[inline]
19137	#[target_feature(enable = "avx512f,avx512vl")]
19138	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19139	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19140	#[rustc_legacy_const_generics(`2`)]
19141	pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19142	unsafe {
19143	static_assert_uimm_bits!(IMM8, `8`);
19144	let a: i32x4 = a.as_i32x4();
19145	let r: i32x4 = vprold128(a, IMM8);
19146	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
19147	}
19148	}
19149
19150	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19151	///
19152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19153	#[inline]
19154	#[target_feature(enable = "avx512f")]
19155	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19156	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19157	#[rustc_legacy_const_generics(`1`)]
19158	pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19159	unsafe {
19160	static_assert_uimm_bits!(IMM8, `8`);
19161	let a: i32x16 = a.as_i32x16();
19162	let r: i32x16 = vprord(a, IMM8);
19163	transmute(src:r)
19164	}
19165	}
19166
19167	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19168	///
19169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19170	#[inline]
19171	#[target_feature(enable = "avx512f")]
19172	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19173	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19174	#[rustc_legacy_const_generics(`3`)]
19175	pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19176	unsafe {
19177	static_assert_uimm_bits!(IMM8, `8`);
19178	let a: i32x16 = a.as_i32x16();
19179	let r: i32x16 = vprord(a, IMM8);
19180	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19181	}
19182	}
19183
19184	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19185	///
19186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19187	#[inline]
19188	#[target_feature(enable = "avx512f")]
19189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19190	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19191	#[rustc_legacy_const_generics(`2`)]
19192	pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19193	unsafe {
19194	static_assert_uimm_bits!(IMM8, `8`);
19195	let a: i32x16 = a.as_i32x16();
19196	let r: i32x16 = vprord(a, IMM8);
19197	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
19198	}
19199	}
19200
19201	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19202	///
19203	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19204	#[inline]
19205	#[target_feature(enable = "avx512f,avx512vl")]
19206	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19207	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19208	#[rustc_legacy_const_generics(`1`)]
19209	pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19210	unsafe {
19211	static_assert_uimm_bits!(IMM8, `8`);
19212	let a: i32x8 = a.as_i32x8();
19213	let r: i32x8 = vprord256(a, IMM8);
19214	transmute(src:r)
19215	}
19216	}
19217
19218	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19219	///
19220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19221	#[inline]
19222	#[target_feature(enable = "avx512f,avx512vl")]
19223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19224	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19225	#[rustc_legacy_const_generics(`3`)]
19226	pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19227	unsafe {
19228	static_assert_uimm_bits!(IMM8, `8`);
19229	let a: i32x8 = a.as_i32x8();
19230	let r: i32x8 = vprord256(a, IMM8);
19231	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19232	}
19233	}
19234
19235	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19236	///
19237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19238	#[inline]
19239	#[target_feature(enable = "avx512f,avx512vl")]
19240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19241	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19242	#[rustc_legacy_const_generics(`2`)]
19243	pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19244	unsafe {
19245	static_assert_uimm_bits!(IMM8, `8`);
19246	let a: i32x8 = a.as_i32x8();
19247	let r: i32x8 = vprord256(a, IMM8);
19248	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
19249	}
19250	}
19251
19252	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19253	///
19254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19255	#[inline]
19256	#[target_feature(enable = "avx512f,avx512vl")]
19257	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19258	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19259	#[rustc_legacy_const_generics(`1`)]
19260	pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19261	unsafe {
19262	static_assert_uimm_bits!(IMM8, `8`);
19263	let a: i32x4 = a.as_i32x4();
19264	let r: i32x4 = vprord128(a, IMM8);
19265	transmute(src:r)
19266	}
19267	}
19268
19269	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19270	///
19271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19272	#[inline]
19273	#[target_feature(enable = "avx512f,avx512vl")]
19274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19275	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19276	#[rustc_legacy_const_generics(`3`)]
19277	pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19278	unsafe {
19279	static_assert_uimm_bits!(IMM8, `8`);
19280	let a: i32x4 = a.as_i32x4();
19281	let r: i32x4 = vprord128(a, IMM8);
19282	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19283	}
19284	}
19285
19286	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19287	///
19288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19289	#[inline]
19290	#[target_feature(enable = "avx512f,avx512vl")]
19291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19292	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19293	#[rustc_legacy_const_generics(`2`)]
19294	pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19295	unsafe {
19296	static_assert_uimm_bits!(IMM8, `8`);
19297	let a: i32x4 = a.as_i32x4();
19298	let r: i32x4 = vprord128(a, IMM8);
19299	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
19300	}
19301	}
19302
19303	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19304	///
19305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19306	#[inline]
19307	#[target_feature(enable = "avx512f")]
19308	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19309	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19310	#[rustc_legacy_const_generics(`1`)]
19311	pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19312	unsafe {
19313	static_assert_uimm_bits!(IMM8, `8`);
19314	let a: i64x8 = a.as_i64x8();
19315	let r: i64x8 = vprolq(a, IMM8);
19316	transmute(src:r)
19317	}
19318	}
19319
19320	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19321	///
19322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19323	#[inline]
19324	#[target_feature(enable = "avx512f")]
19325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19326	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19327	#[rustc_legacy_const_generics(`3`)]
19328	pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19329	unsafe {
19330	static_assert_uimm_bits!(IMM8, `8`);
19331	let a: i64x8 = a.as_i64x8();
19332	let r: i64x8 = vprolq(a, IMM8);
19333	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
19334	}
19335	}
19336
19337	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338	///
19339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19340	#[inline]
19341	#[target_feature(enable = "avx512f")]
19342	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19343	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19344	#[rustc_legacy_const_generics(`2`)]
19345	pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19346	unsafe {
19347	static_assert_uimm_bits!(IMM8, `8`);
19348	let a: i64x8 = a.as_i64x8();
19349	let r: i64x8 = vprolq(a, IMM8);
19350	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
19351	}
19352	}
19353
19354	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19355	///
19356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19357	#[inline]
19358	#[target_feature(enable = "avx512f,avx512vl")]
19359	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19360	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19361	#[rustc_legacy_const_generics(`1`)]
19362	pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19363	unsafe {
19364	static_assert_uimm_bits!(IMM8, `8`);
19365	let a: i64x4 = a.as_i64x4();
19366	let r: i64x4 = vprolq256(a, IMM8);
19367	transmute(src:r)
19368	}
19369	}
19370
19371	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19372	///
19373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19374	#[inline]
19375	#[target_feature(enable = "avx512f,avx512vl")]
19376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19377	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19378	#[rustc_legacy_const_generics(`3`)]
19379	pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19380	unsafe {
19381	static_assert_uimm_bits!(IMM8, `8`);
19382	let a: i64x4 = a.as_i64x4();
19383	let r: i64x4 = vprolq256(a, IMM8);
19384	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
19385	}
19386	}
19387
19388	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19389	///
19390	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19391	#[inline]
19392	#[target_feature(enable = "avx512f,avx512vl")]
19393	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19394	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19395	#[rustc_legacy_const_generics(`2`)]
19396	pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19397	unsafe {
19398	static_assert_uimm_bits!(IMM8, `8`);
19399	let a: i64x4 = a.as_i64x4();
19400	let r: i64x4 = vprolq256(a, IMM8);
19401	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
19402	}
19403	}
19404
19405	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19406	///
19407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19408	#[inline]
19409	#[target_feature(enable = "avx512f,avx512vl")]
19410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19411	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19412	#[rustc_legacy_const_generics(`1`)]
19413	pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19414	unsafe {
19415	static_assert_uimm_bits!(IMM8, `8`);
19416	let a: i64x2 = a.as_i64x2();
19417	let r: i64x2 = vprolq128(a, IMM8);
19418	transmute(src:r)
19419	}
19420	}
19421
19422	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19423	///
19424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19425	#[inline]
19426	#[target_feature(enable = "avx512f,avx512vl")]
19427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19428	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19429	#[rustc_legacy_const_generics(`3`)]
19430	pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19431	unsafe {
19432	static_assert_uimm_bits!(IMM8, `8`);
19433	let a: i64x2 = a.as_i64x2();
19434	let r: i64x2 = vprolq128(a, IMM8);
19435	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
19436	}
19437	}
19438
19439	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19440	///
19441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19442	#[inline]
19443	#[target_feature(enable = "avx512f,avx512vl")]
19444	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19445	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19446	#[rustc_legacy_const_generics(`2`)]
19447	pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19448	unsafe {
19449	static_assert_uimm_bits!(IMM8, `8`);
19450	let a: i64x2 = a.as_i64x2();
19451	let r: i64x2 = vprolq128(a, IMM8);
19452	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
19453	}
19454	}
19455
19456	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19457	///
19458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19459	#[inline]
19460	#[target_feature(enable = "avx512f")]
19461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19462	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19463	#[rustc_legacy_const_generics(`1`)]
19464	pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19465	unsafe {
19466	static_assert_uimm_bits!(IMM8, `8`);
19467	let a: i64x8 = a.as_i64x8();
19468	let r: i64x8 = vprorq(a, IMM8);
19469	transmute(src:r)
19470	}
19471	}
19472
19473	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19474	///
19475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19476	#[inline]
19477	#[target_feature(enable = "avx512f")]
19478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19480	#[rustc_legacy_const_generics(`3`)]
19481	pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19482	unsafe {
19483	static_assert_uimm_bits!(IMM8, `8`);
19484	let a: i64x8 = a.as_i64x8();
19485	let r: i64x8 = vprorq(a, IMM8);
19486	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
19487	}
19488	}
19489
19490	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19491	///
19492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19493	#[inline]
19494	#[target_feature(enable = "avx512f")]
19495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19496	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19497	#[rustc_legacy_const_generics(`2`)]
19498	pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19499	unsafe {
19500	static_assert_uimm_bits!(IMM8, `8`);
19501	let a: i64x8 = a.as_i64x8();
19502	let r: i64x8 = vprorq(a, IMM8);
19503	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
19504	}
19505	}
19506
19507	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19508	///
19509	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19510	#[inline]
19511	#[target_feature(enable = "avx512f,avx512vl")]
19512	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19513	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19514	#[rustc_legacy_const_generics(`1`)]
19515	pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19516	unsafe {
19517	static_assert_uimm_bits!(IMM8, `8`);
19518	let a: i64x4 = a.as_i64x4();
19519	let r: i64x4 = vprorq256(a, IMM8);
19520	transmute(src:r)
19521	}
19522	}
19523
19524	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19525	///
19526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19527	#[inline]
19528	#[target_feature(enable = "avx512f,avx512vl")]
19529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19530	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19531	#[rustc_legacy_const_generics(`3`)]
19532	pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19533	unsafe {
19534	static_assert_uimm_bits!(IMM8, `8`);
19535	let a: i64x4 = a.as_i64x4();
19536	let r: i64x4 = vprorq256(a, IMM8);
19537	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
19538	}
19539	}
19540
19541	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19542	///
19543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19544	#[inline]
19545	#[target_feature(enable = "avx512f,avx512vl")]
19546	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19547	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19548	#[rustc_legacy_const_generics(`2`)]
19549	pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19550	unsafe {
19551	static_assert_uimm_bits!(IMM8, `8`);
19552	let a: i64x4 = a.as_i64x4();
19553	let r: i64x4 = vprorq256(a, IMM8);
19554	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
19555	}
19556	}
19557
19558	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19559	///
19560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19561	#[inline]
19562	#[target_feature(enable = "avx512f,avx512vl")]
19563	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19564	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19565	#[rustc_legacy_const_generics(`1`)]
19566	pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19567	unsafe {
19568	static_assert_uimm_bits!(IMM8, `8`);
19569	let a: i64x2 = a.as_i64x2();
19570	let r: i64x2 = vprorq128(a, IMM8);
19571	transmute(src:r)
19572	}
19573	}
19574
19575	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19576	///
19577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19578	#[inline]
19579	#[target_feature(enable = "avx512f,avx512vl")]
19580	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19581	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19582	#[rustc_legacy_const_generics(`3`)]
19583	pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19584	unsafe {
19585	static_assert_uimm_bits!(IMM8, `8`);
19586	let a: i64x2 = a.as_i64x2();
19587	let r: i64x2 = vprorq128(a, IMM8);
19588	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
19589	}
19590	}
19591
19592	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19593	///
19594	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19595	#[inline]
19596	#[target_feature(enable = "avx512f,avx512vl")]
19597	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19598	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19599	#[rustc_legacy_const_generics(`2`)]
19600	pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19601	unsafe {
19602	static_assert_uimm_bits!(IMM8, `8`);
19603	let a: i64x2 = a.as_i64x2();
19604	let r: i64x2 = vprorq128(a, IMM8);
19605	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
19606	}
19607	}
19608
19609	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19610	///
19611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19612	#[inline]
19613	#[target_feature(enable = "avx512f")]
19614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19615	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19616	#[rustc_legacy_const_generics(`1`)]
19617	pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19618	unsafe {
19619	static_assert_uimm_bits!(IMM8, `8`);
19620	if IMM8 >= `32` {
19621	_mm512_setzero_si512()
19622	} else {
19623	transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
19624	}
19625	}
19626	}
19627
19628	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629	///
19630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19631	#[inline]
19632	#[target_feature(enable = "avx512f")]
19633	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19634	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19635	#[rustc_legacy_const_generics(`3`)]
19636	pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19637	unsafe {
19638	static_assert_uimm_bits!(IMM8, `8`);
19639	let shf: u32x16 = if IMM8 >= `32` {
19640	u32x16::ZERO
19641	} else {
19642	simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
19643	};
19644	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
19645	}
19646	}
19647
19648	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19649	///
19650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19651	#[inline]
19652	#[target_feature(enable = "avx512f")]
19653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19654	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19655	#[rustc_legacy_const_generics(`2`)]
19656	pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19657	unsafe {
19658	static_assert_uimm_bits!(IMM8, `8`);
19659	if IMM8 >= `32` {
19660	_mm512_setzero_si512()
19661	} else {
19662	let shf: u32x16 = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
19663	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
19664	}
19665	}
19666	}
19667
19668	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19669	///
19670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19671	#[inline]
19672	#[target_feature(enable = "avx512f,avx512vl")]
19673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19674	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19675	#[rustc_legacy_const_generics(`3`)]
19676	pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19677	unsafe {
19678	static_assert_uimm_bits!(IMM8, `8`);
19679	let r: u32x8 = if IMM8 >= `32` {
19680	u32x8::ZERO
19681	} else {
19682	simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
19683	};
19684	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
19685	}
19686	}
19687
19688	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19689	///
19690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19691	#[inline]
19692	#[target_feature(enable = "avx512f,avx512vl")]
19693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19694	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19695	#[rustc_legacy_const_generics(`2`)]
19696	pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19697	unsafe {
19698	static_assert_uimm_bits!(IMM8, `8`);
19699	if IMM8 >= `32` {
19700	_mm256_setzero_si256()
19701	} else {
19702	let r: u32x8 = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
19703	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
19704	}
19705	}
19706	}
19707
19708	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19709	///
19710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19711	#[inline]
19712	#[target_feature(enable = "avx512f,avx512vl")]
19713	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19714	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19715	#[rustc_legacy_const_generics(`3`)]
19716	pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19717	unsafe {
19718	static_assert_uimm_bits!(IMM8, `8`);
19719	let r: u32x4 = if IMM8 >= `32` {
19720	u32x4::ZERO
19721	} else {
19722	simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
19723	};
19724	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
19725	}
19726	}
19727
19728	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19729	///
19730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19731	#[inline]
19732	#[target_feature(enable = "avx512f,avx512vl")]
19733	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19734	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
19735	#[rustc_legacy_const_generics(`2`)]
19736	pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19737	unsafe {
19738	static_assert_uimm_bits!(IMM8, `8`);
19739	if IMM8 >= `32` {
19740	_mm_setzero_si128()
19741	} else {
19742	let r: u32x4 = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
19743	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
19744	}
19745	}
19746	}
19747
19748	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19749	///
19750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19751	#[inline]
19752	#[target_feature(enable = "avx512f")]
19753	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19754	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19755	#[rustc_legacy_const_generics(`1`)]
19756	pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19757	unsafe {
19758	static_assert_uimm_bits!(IMM8, `8`);
19759	if IMM8 >= `32` {
19760	_mm512_setzero_si512()
19761	} else {
19762	transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
19763	}
19764	}
19765	}
19766
19767	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19768	///
19769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19770	#[inline]
19771	#[target_feature(enable = "avx512f")]
19772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19773	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19774	#[rustc_legacy_const_generics(`3`)]
19775	pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19776	unsafe {
19777	static_assert_uimm_bits!(IMM8, `8`);
19778	let shf: u32x16 = if IMM8 >= `32` {
19779	u32x16::ZERO
19780	} else {
19781	simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
19782	};
19783	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
19784	}
19785	}
19786
19787	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19788	///
19789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19790	#[inline]
19791	#[target_feature(enable = "avx512f")]
19792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19793	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19794	#[rustc_legacy_const_generics(`2`)]
19795	pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19796	unsafe {
19797	static_assert_uimm_bits!(IMM8, `8`);
19798	if IMM8 >= `32` {
19799	_mm512_setzero_si512()
19800	} else {
19801	let shf: u32x16 = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
19802	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
19803	}
19804	}
19805	}
19806
19807	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19808	///
19809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19810	#[inline]
19811	#[target_feature(enable = "avx512f,avx512vl")]
19812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19813	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19814	#[rustc_legacy_const_generics(`3`)]
19815	pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19816	unsafe {
19817	static_assert_uimm_bits!(IMM8, `8`);
19818	let r: u32x8 = if IMM8 >= `32` {
19819	u32x8::ZERO
19820	} else {
19821	simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
19822	};
19823	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
19824	}
19825	}
19826
19827	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19828	///
19829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19830	#[inline]
19831	#[target_feature(enable = "avx512f,avx512vl")]
19832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19833	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19834	#[rustc_legacy_const_generics(`2`)]
19835	pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19836	unsafe {
19837	static_assert_uimm_bits!(IMM8, `8`);
19838	if IMM8 >= `32` {
19839	_mm256_setzero_si256()
19840	} else {
19841	let r: u32x8 = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
19842	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
19843	}
19844	}
19845	}
19846
19847	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19848	///
19849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19850	#[inline]
19851	#[target_feature(enable = "avx512f,avx512vl")]
19852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19853	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19854	#[rustc_legacy_const_generics(`3`)]
19855	pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19856	unsafe {
19857	static_assert_uimm_bits!(IMM8, `8`);
19858	let r: u32x4 = if IMM8 >= `32` {
19859	u32x4::ZERO
19860	} else {
19861	simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
19862	};
19863	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
19864	}
19865	}
19866
19867	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19868	///
19869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19870	#[inline]
19871	#[target_feature(enable = "avx512f,avx512vl")]
19872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19873	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
19874	#[rustc_legacy_const_generics(`2`)]
19875	pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19876	unsafe {
19877	static_assert_uimm_bits!(IMM8, `8`);
19878	if IMM8 >= `32` {
19879	_mm_setzero_si128()
19880	} else {
19881	let r: u32x4 = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
19882	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
19883	}
19884	}
19885	}
19886
19887	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19888	///
19889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19890	#[inline]
19891	#[target_feature(enable = "avx512f")]
19892	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19893	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
19894	#[rustc_legacy_const_generics(`1`)]
19895	pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19896	unsafe {
19897	static_assert_uimm_bits!(IMM8, `8`);
19898	if IMM8 >= `64` {
19899	_mm512_setzero_si512()
19900	} else {
19901	transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
19902	}
19903	}
19904	}
19905
19906	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19907	///
19908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19909	#[inline]
19910	#[target_feature(enable = "avx512f")]
19911	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19912	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
19913	#[rustc_legacy_const_generics(`3`)]
19914	pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19915	unsafe {
19916	static_assert_uimm_bits!(IMM8, `8`);
19917	let shf: u64x8 = if IMM8 >= `64` {
19918	u64x8::ZERO
19919	} else {
19920	simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
19921	};
19922	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
19923	}
19924	}
19925
19926	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19927	///
19928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
19929	#[inline]
19930	#[target_feature(enable = "avx512f")]
19931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19932	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
19933	#[rustc_legacy_const_generics(`2`)]
19934	pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19935	unsafe {
19936	static_assert_uimm_bits!(IMM8, `8`);
19937	if IMM8 >= `64` {
19938	_mm512_setzero_si512()
19939	} else {
19940	let shf: u64x8 = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
19941	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
19942	}
19943	}
19944	}
19945
19946	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19947	///
19948	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
19949	#[inline]
19950	#[target_feature(enable = "avx512f,avx512vl")]
19951	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19952	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
19953	#[rustc_legacy_const_generics(`3`)]
19954	pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19955	unsafe {
19956	static_assert_uimm_bits!(IMM8, `8`);
19957	let r: u64x4 = if IMM8 >= `64` {
19958	u64x4::ZERO
19959	} else {
19960	simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
19961	};
19962	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
19963	}
19964	}
19965
19966	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19967	///
19968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
19969	#[inline]
19970	#[target_feature(enable = "avx512f,avx512vl")]
19971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19972	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
19973	#[rustc_legacy_const_generics(`2`)]
19974	pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19975	unsafe {
19976	static_assert_uimm_bits!(IMM8, `8`);
19977	if IMM8 >= `64` {
19978	_mm256_setzero_si256()
19979	} else {
19980	let r: u64x4 = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
19981	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
19982	}
19983	}
19984	}
19985
19986	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19987	///
19988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
19989	#[inline]
19990	#[target_feature(enable = "avx512f,avx512vl")]
19991	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19992	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
19993	#[rustc_legacy_const_generics(`3`)]
19994	pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19995	unsafe {
19996	static_assert_uimm_bits!(IMM8, `8`);
19997	let r: u64x2 = if IMM8 >= `64` {
19998	u64x2::ZERO
19999	} else {
20000	simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20001	};
20002	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20003	}
20004	}
20005
20006	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20007	///
20008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20009	#[inline]
20010	#[target_feature(enable = "avx512f,avx512vl")]
20011	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20012	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20013	#[rustc_legacy_const_generics(`2`)]
20014	pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20015	unsafe {
20016	static_assert_uimm_bits!(IMM8, `8`);
20017	if IMM8 >= `64` {
20018	_mm_setzero_si128()
20019	} else {
20020	let r: u64x2 = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20021	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20022	}
20023	}
20024	}
20025
20026	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20027	///
20028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20029	#[inline]
20030	#[target_feature(enable = "avx512f")]
20031	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20032	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20033	#[rustc_legacy_const_generics(`1`)]
20034	pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20035	unsafe {
20036	static_assert_uimm_bits!(IMM8, `8`);
20037	if IMM8 >= `64` {
20038	_mm512_setzero_si512()
20039	} else {
20040	transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20041	}
20042	}
20043	}
20044
20045	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20046	///
20047	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20048	#[inline]
20049	#[target_feature(enable = "avx512f")]
20050	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20051	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20052	#[rustc_legacy_const_generics(`3`)]
20053	pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20054	unsafe {
20055	static_assert_uimm_bits!(IMM8, `8`);
20056	let shf: u64x8 = if IMM8 >= `64` {
20057	u64x8::ZERO
20058	} else {
20059	simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20060	};
20061	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20062	}
20063	}
20064
20065	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20066	///
20067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20068	#[inline]
20069	#[target_feature(enable = "avx512f")]
20070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20071	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20072	#[rustc_legacy_const_generics(`2`)]
20073	pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20074	unsafe {
20075	static_assert_uimm_bits!(IMM8, `8`);
20076	if IMM8 >= `64` {
20077	_mm512_setzero_si512()
20078	} else {
20079	let shf: u64x8 = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20080	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20081	}
20082	}
20083	}
20084
20085	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20086	///
20087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20088	#[inline]
20089	#[target_feature(enable = "avx512f,avx512vl")]
20090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20091	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20092	#[rustc_legacy_const_generics(`3`)]
20093	pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20094	unsafe {
20095	static_assert_uimm_bits!(IMM8, `8`);
20096	let r: u64x4 = if IMM8 >= `64` {
20097	u64x4::ZERO
20098	} else {
20099	simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20100	};
20101	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20102	}
20103	}
20104
20105	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20106	///
20107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20108	#[inline]
20109	#[target_feature(enable = "avx512f,avx512vl")]
20110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20111	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20112	#[rustc_legacy_const_generics(`2`)]
20113	pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20114	unsafe {
20115	static_assert_uimm_bits!(IMM8, `8`);
20116	if IMM8 >= `64` {
20117	_mm256_setzero_si256()
20118	} else {
20119	let r: u64x4 = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20120	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20121	}
20122	}
20123	}
20124
20125	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20126	///
20127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20128	#[inline]
20129	#[target_feature(enable = "avx512f,avx512vl")]
20130	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20131	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20132	#[rustc_legacy_const_generics(`3`)]
20133	pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20134	unsafe {
20135	static_assert_uimm_bits!(IMM8, `8`);
20136	let r: u64x2 = if IMM8 >= `64` {
20137	u64x2::ZERO
20138	} else {
20139	simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20140	};
20141	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20142	}
20143	}
20144
20145	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20146	///
20147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20148	#[inline]
20149	#[target_feature(enable = "avx512f,avx512vl")]
20150	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20151	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20152	#[rustc_legacy_const_generics(`2`)]
20153	pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20154	unsafe {
20155	static_assert_uimm_bits!(IMM8, `8`);
20156	if IMM8 >= `64` {
20157	_mm_setzero_si128()
20158	} else {
20159	let r: u64x2 = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20160	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20161	}
20162	}
20163	}
20164
20165	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20166	///
20167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20168	#[inline]
20169	#[target_feature(enable = "avx512f")]
20170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20171	#[cfg_attr(test, assert_instr(vpslld))]
20172	pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20173	unsafe { transmute(src:vpslld(a.as_i32x16(), count.as_i32x4())) }
20174	}
20175
20176	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20177	///
20178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20179	#[inline]
20180	#[target_feature(enable = "avx512f")]
20181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20182	#[cfg_attr(test, assert_instr(vpslld))]
20183	pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20184	unsafe {
20185	let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
20186	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20187	}
20188	}
20189
20190	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20191	///
20192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20193	#[inline]
20194	#[target_feature(enable = "avx512f")]
20195	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20196	#[cfg_attr(test, assert_instr(vpslld))]
20197	pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20198	unsafe {
20199	let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
20200	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20201	}
20202	}
20203
20204	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20205	///
20206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20207	#[inline]
20208	#[target_feature(enable = "avx512f,avx512vl")]
20209	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20210	#[cfg_attr(test, assert_instr(vpslld))]
20211	pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20212	unsafe {
20213	let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
20214	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20215	}
20216	}
20217
20218	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20219	///
20220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20221	#[inline]
20222	#[target_feature(enable = "avx512f,avx512vl")]
20223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20224	#[cfg_attr(test, assert_instr(vpslld))]
20225	pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20226	unsafe {
20227	let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
20228	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20229	}
20230	}
20231
20232	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20233	///
20234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20235	#[inline]
20236	#[target_feature(enable = "avx512f,avx512vl")]
20237	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20238	#[cfg_attr(test, assert_instr(vpslld))]
20239	pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20240	unsafe {
20241	let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
20242	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20243	}
20244	}
20245
20246	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20247	///
20248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20249	#[inline]
20250	#[target_feature(enable = "avx512f,avx512vl")]
20251	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252	#[cfg_attr(test, assert_instr(vpslld))]
20253	pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20254	unsafe {
20255	let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
20256	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20257	}
20258	}
20259
20260	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20261	///
20262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20263	#[inline]
20264	#[target_feature(enable = "avx512f")]
20265	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20266	#[cfg_attr(test, assert_instr(vpsrld))]
20267	pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20268	unsafe { transmute(src:vpsrld(a.as_i32x16(), count.as_i32x4())) }
20269	}
20270
20271	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20272	///
20273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20274	#[inline]
20275	#[target_feature(enable = "avx512f")]
20276	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20277	#[cfg_attr(test, assert_instr(vpsrld))]
20278	pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20279	unsafe {
20280	let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
20281	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20282	}
20283	}
20284
20285	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20286	///
20287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20288	#[inline]
20289	#[target_feature(enable = "avx512f")]
20290	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20291	#[cfg_attr(test, assert_instr(vpsrld))]
20292	pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20293	unsafe {
20294	let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
20295	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20296	}
20297	}
20298
20299	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20300	///
20301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20302	#[inline]
20303	#[target_feature(enable = "avx512f,avx512vl")]
20304	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20305	#[cfg_attr(test, assert_instr(vpsrld))]
20306	pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20307	unsafe {
20308	let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
20309	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20310	}
20311	}
20312
20313	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20314	///
20315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20316	#[inline]
20317	#[target_feature(enable = "avx512f,avx512vl")]
20318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20319	#[cfg_attr(test, assert_instr(vpsrld))]
20320	pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20321	unsafe {
20322	let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
20323	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20324	}
20325	}
20326
20327	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20328	///
20329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20330	#[inline]
20331	#[target_feature(enable = "avx512f,avx512vl")]
20332	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20333	#[cfg_attr(test, assert_instr(vpsrld))]
20334	pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20335	unsafe {
20336	let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
20337	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20338	}
20339	}
20340
20341	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20342	///
20343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20344	#[inline]
20345	#[target_feature(enable = "avx512f,avx512vl")]
20346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347	#[cfg_attr(test, assert_instr(vpsrld))]
20348	pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20349	unsafe {
20350	let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
20351	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20352	}
20353	}
20354
20355	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20356	///
20357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20358	#[inline]
20359	#[target_feature(enable = "avx512f")]
20360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20361	#[cfg_attr(test, assert_instr(vpsllq))]
20362	pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20363	unsafe { transmute(src:vpsllq(a.as_i64x8(), count.as_i64x2())) }
20364	}
20365
20366	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20367	///
20368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20369	#[inline]
20370	#[target_feature(enable = "avx512f")]
20371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20372	#[cfg_attr(test, assert_instr(vpsllq))]
20373	pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20374	unsafe {
20375	let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
20376	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20377	}
20378	}
20379
20380	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20381	///
20382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20383	#[inline]
20384	#[target_feature(enable = "avx512f")]
20385	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20386	#[cfg_attr(test, assert_instr(vpsllq))]
20387	pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20388	unsafe {
20389	let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
20390	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20391	}
20392	}
20393
20394	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20395	///
20396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20397	#[inline]
20398	#[target_feature(enable = "avx512f,avx512vl")]
20399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20400	#[cfg_attr(test, assert_instr(vpsllq))]
20401	pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20402	unsafe {
20403	let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
20404	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20405	}
20406	}
20407
20408	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20409	///
20410	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20411	#[inline]
20412	#[target_feature(enable = "avx512f,avx512vl")]
20413	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20414	#[cfg_attr(test, assert_instr(vpsllq))]
20415	pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20416	unsafe {
20417	let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
20418	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20419	}
20420	}
20421
20422	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20423	///
20424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20425	#[inline]
20426	#[target_feature(enable = "avx512f,avx512vl")]
20427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20428	#[cfg_attr(test, assert_instr(vpsllq))]
20429	pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20430	unsafe {
20431	let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
20432	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20433	}
20434	}
20435
20436	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20437	///
20438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20439	#[inline]
20440	#[target_feature(enable = "avx512f,avx512vl")]
20441	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20442	#[cfg_attr(test, assert_instr(vpsllq))]
20443	pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20444	unsafe {
20445	let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
20446	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20447	}
20448	}
20449
20450	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20451	///
20452	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20453	#[inline]
20454	#[target_feature(enable = "avx512f")]
20455	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20456	#[cfg_attr(test, assert_instr(vpsrlq))]
20457	pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20458	unsafe { transmute(src:vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20459	}
20460
20461	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20462	///
20463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20464	#[inline]
20465	#[target_feature(enable = "avx512f")]
20466	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20467	#[cfg_attr(test, assert_instr(vpsrlq))]
20468	pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20469	unsafe {
20470	let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
20471	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20472	}
20473	}
20474
20475	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20476	///
20477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20478	#[inline]
20479	#[target_feature(enable = "avx512f")]
20480	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20481	#[cfg_attr(test, assert_instr(vpsrlq))]
20482	pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20483	unsafe {
20484	let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
20485	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20486	}
20487	}
20488
20489	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20490	///
20491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20492	#[inline]
20493	#[target_feature(enable = "avx512f,avx512vl")]
20494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20495	#[cfg_attr(test, assert_instr(vpsrlq))]
20496	pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20497	unsafe {
20498	let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
20499	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20500	}
20501	}
20502
20503	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20504	///
20505	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20506	#[inline]
20507	#[target_feature(enable = "avx512f,avx512vl")]
20508	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20509	#[cfg_attr(test, assert_instr(vpsrlq))]
20510	pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20511	unsafe {
20512	let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
20513	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20514	}
20515	}
20516
20517	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20518	///
20519	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20520	#[inline]
20521	#[target_feature(enable = "avx512f,avx512vl")]
20522	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20523	#[cfg_attr(test, assert_instr(vpsrlq))]
20524	pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20525	unsafe {
20526	let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
20527	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20528	}
20529	}
20530
20531	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20532	///
20533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20534	#[inline]
20535	#[target_feature(enable = "avx512f,avx512vl")]
20536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20537	#[cfg_attr(test, assert_instr(vpsrlq))]
20538	pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20539	unsafe {
20540	let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
20541	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20542	}
20543	}
20544
20545	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20546	///
20547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20548	#[inline]
20549	#[target_feature(enable = "avx512f")]
20550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20551	#[cfg_attr(test, assert_instr(vpsrad))]
20552	pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20553	unsafe { transmute(src:vpsrad(a.as_i32x16(), count.as_i32x4())) }
20554	}
20555
20556	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557	///
20558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20559	#[inline]
20560	#[target_feature(enable = "avx512f")]
20561	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20562	#[cfg_attr(test, assert_instr(vpsrad))]
20563	pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20564	unsafe {
20565	let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
20566	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20567	}
20568	}
20569
20570	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20571	///
20572	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20573	#[inline]
20574	#[target_feature(enable = "avx512f")]
20575	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20576	#[cfg_attr(test, assert_instr(vpsrad))]
20577	pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20578	unsafe {
20579	let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
20580	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20581	}
20582	}
20583
20584	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20585	///
20586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20587	#[inline]
20588	#[target_feature(enable = "avx512f,avx512vl")]
20589	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590	#[cfg_attr(test, assert_instr(vpsrad))]
20591	pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20592	unsafe {
20593	let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
20594	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20595	}
20596	}
20597
20598	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20599	///
20600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20601	#[inline]
20602	#[target_feature(enable = "avx512f,avx512vl")]
20603	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20604	#[cfg_attr(test, assert_instr(vpsrad))]
20605	pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20606	unsafe {
20607	let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
20608	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20609	}
20610	}
20611
20612	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20613	///
20614	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20615	#[inline]
20616	#[target_feature(enable = "avx512f,avx512vl")]
20617	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20618	#[cfg_attr(test, assert_instr(vpsrad))]
20619	pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20620	unsafe {
20621	let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
20622	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20623	}
20624	}
20625
20626	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20627	///
20628	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20629	#[inline]
20630	#[target_feature(enable = "avx512f,avx512vl")]
20631	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20632	#[cfg_attr(test, assert_instr(vpsrad))]
20633	pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20634	unsafe {
20635	let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
20636	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20637	}
20638	}
20639
20640	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20641	///
20642	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20643	#[inline]
20644	#[target_feature(enable = "avx512f")]
20645	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20646	#[cfg_attr(test, assert_instr(vpsraq))]
20647	pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20648	unsafe { transmute(src:vpsraq(a.as_i64x8(), count.as_i64x2())) }
20649	}
20650
20651	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20652	///
20653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20654	#[inline]
20655	#[target_feature(enable = "avx512f")]
20656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20657	#[cfg_attr(test, assert_instr(vpsraq))]
20658	pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20659	unsafe {
20660	let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
20661	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20662	}
20663	}
20664
20665	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20666	///
20667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20668	#[inline]
20669	#[target_feature(enable = "avx512f")]
20670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20671	#[cfg_attr(test, assert_instr(vpsraq))]
20672	pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20673	unsafe {
20674	let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
20675	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20676	}
20677	}
20678
20679	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20680	///
20681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20682	#[inline]
20683	#[target_feature(enable = "avx512f,avx512vl")]
20684	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20685	#[cfg_attr(test, assert_instr(vpsraq))]
20686	pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20687	unsafe { transmute(src:vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20688	}
20689
20690	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20691	///
20692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20693	#[inline]
20694	#[target_feature(enable = "avx512f,avx512vl")]
20695	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20696	#[cfg_attr(test, assert_instr(vpsraq))]
20697	pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20698	unsafe {
20699	let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
20700	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20701	}
20702	}
20703
20704	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20705	///
20706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20707	#[inline]
20708	#[target_feature(enable = "avx512f,avx512vl")]
20709	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20710	#[cfg_attr(test, assert_instr(vpsraq))]
20711	pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20712	unsafe {
20713	let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
20714	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20715	}
20716	}
20717
20718	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20719	///
20720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20721	#[inline]
20722	#[target_feature(enable = "avx512f,avx512vl")]
20723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20724	#[cfg_attr(test, assert_instr(vpsraq))]
20725	pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20726	unsafe { transmute(src:vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20727	}
20728
20729	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20730	///
20731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20732	#[inline]
20733	#[target_feature(enable = "avx512f,avx512vl")]
20734	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20735	#[cfg_attr(test, assert_instr(vpsraq))]
20736	pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737	unsafe {
20738	let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
20739	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20740	}
20741	}
20742
20743	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20744	///
20745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20746	#[inline]
20747	#[target_feature(enable = "avx512f,avx512vl")]
20748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20749	#[cfg_attr(test, assert_instr(vpsraq))]
20750	pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20751	unsafe {
20752	let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
20753	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20754	}
20755	}
20756
20757	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20758	///
20759	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20760	#[inline]
20761	#[target_feature(enable = "avx512f")]
20762	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20763	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20764	#[rustc_legacy_const_generics(`1`)]
20765	pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20766	unsafe {
20767	static_assert_uimm_bits!(IMM8, `8`);
20768	transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32)))
20769	}
20770	}
20771
20772	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20773	///
20774	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20775	#[inline]
20776	#[target_feature(enable = "avx512f")]
20777	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20778	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20779	#[rustc_legacy_const_generics(`3`)]
20780	pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20781	unsafe {
20782	static_assert_uimm_bits!(IMM8, `8`);
20783	let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32));
20784	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
20785	}
20786	}
20787
20788	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20789	///
20790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20791	#[inline]
20792	#[target_feature(enable = "avx512f")]
20793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20794	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20795	#[rustc_legacy_const_generics(`2`)]
20796	pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20797	unsafe {
20798	static_assert_uimm_bits!(IMM8, `8`);
20799	let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32));
20800	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
20801	}
20802	}
20803
20804	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20805	///
20806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20807	#[inline]
20808	#[target_feature(enable = "avx512f,avx512vl")]
20809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20810	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20811	#[rustc_legacy_const_generics(`3`)]
20812	pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20813	unsafe {
20814	let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`) as i32));
20815	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
20816	}
20817	}
20818
20819	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20820	///
20821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20822	#[inline]
20823	#[target_feature(enable = "avx512f,avx512vl")]
20824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20825	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20826	#[rustc_legacy_const_generics(`2`)]
20827	pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20828	unsafe {
20829	let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`) as i32));
20830	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
20831	}
20832	}
20833
20834	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20835	///
20836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20837	#[inline]
20838	#[target_feature(enable = "avx512f,avx512vl")]
20839	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20840	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20841	#[rustc_legacy_const_generics(`3`)]
20842	pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20843	unsafe {
20844	let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`) as i32));
20845	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
20846	}
20847	}
20848
20849	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20850	///
20851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20852	#[inline]
20853	#[target_feature(enable = "avx512f,avx512vl")]
20854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20855	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
20856	#[rustc_legacy_const_generics(`2`)]
20857	pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20858	unsafe {
20859	let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`) as i32));
20860	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
20861	}
20862	}
20863
20864	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20865	///
20866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20867	#[inline]
20868	#[target_feature(enable = "avx512f")]
20869	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20870	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20871	#[rustc_legacy_const_generics(`1`)]
20872	pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20873	unsafe {
20874	static_assert_uimm_bits!(IMM8, `8`);
20875	transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64)))
20876	}
20877	}
20878
20879	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20880	///
20881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20882	#[inline]
20883	#[target_feature(enable = "avx512f")]
20884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20885	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20886	#[rustc_legacy_const_generics(`3`)]
20887	pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20888	unsafe {
20889	static_assert_uimm_bits!(IMM8, `8`);
20890	let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64));
20891	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20892	}
20893	}
20894
20895	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20896	///
20897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20898	#[inline]
20899	#[target_feature(enable = "avx512f")]
20900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20901	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20902	#[rustc_legacy_const_generics(`2`)]
20903	pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20904	unsafe {
20905	static_assert_uimm_bits!(IMM8, `8`);
20906	let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64));
20907	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20908	}
20909	}
20910
20911	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20912	///
20913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20914	#[inline]
20915	#[target_feature(enable = "avx512f,avx512vl")]
20916	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20917	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20918	#[rustc_legacy_const_generics(`1`)]
20919	pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20920	unsafe {
20921	static_assert_uimm_bits!(IMM8, `8`);
20922	transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64)))
20923	}
20924	}
20925
20926	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20927	///
20928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
20929	#[inline]
20930	#[target_feature(enable = "avx512f,avx512vl")]
20931	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20932	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20933	#[rustc_legacy_const_generics(`3`)]
20934	pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20935	unsafe {
20936	static_assert_uimm_bits!(IMM8, `8`);
20937	let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64));
20938	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20939	}
20940	}
20941
20942	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20943	///
20944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
20945	#[inline]
20946	#[target_feature(enable = "avx512f,avx512vl")]
20947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20948	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20949	#[rustc_legacy_const_generics(`2`)]
20950	pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20951	unsafe {
20952	static_assert_uimm_bits!(IMM8, `8`);
20953	let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64));
20954	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20955	}
20956	}
20957
20958	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20959	///
20960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
20961	#[inline]
20962	#[target_feature(enable = "avx512f,avx512vl")]
20963	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20964	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20965	#[rustc_legacy_const_generics(`1`)]
20966	pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
20967	unsafe {
20968	static_assert_uimm_bits!(IMM8, `8`);
20969	transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64)))
20970	}
20971	}
20972
20973	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20974	///
20975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
20976	#[inline]
20977	#[target_feature(enable = "avx512f,avx512vl")]
20978	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20979	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20980	#[rustc_legacy_const_generics(`3`)]
20981	pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20982	unsafe {
20983	static_assert_uimm_bits!(IMM8, `8`);
20984	let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64));
20985	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20986	}
20987	}
20988
20989	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20990	///
20991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
20992	#[inline]
20993	#[target_feature(enable = "avx512f,avx512vl")]
20994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20995	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
20996	#[rustc_legacy_const_generics(`2`)]
20997	pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20998	unsafe {
20999	static_assert_uimm_bits!(IMM8, `8`);
21000	let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64));
21001	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21002	}
21003	}
21004
21005	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21006	///
21007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21008	#[inline]
21009	#[target_feature(enable = "avx512f")]
21010	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21011	#[cfg_attr(test, assert_instr(vpsravd))]
21012	pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21013	unsafe { transmute(src:vpsravd(a.as_i32x16(), count.as_i32x16())) }
21014	}
21015
21016	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21017	///
21018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21019	#[inline]
21020	#[target_feature(enable = "avx512f")]
21021	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21022	#[cfg_attr(test, assert_instr(vpsravd))]
21023	pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21024	unsafe {
21025	let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
21026	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21027	}
21028	}
21029
21030	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21031	///
21032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21033	#[inline]
21034	#[target_feature(enable = "avx512f")]
21035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21036	#[cfg_attr(test, assert_instr(vpsravd))]
21037	pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21038	unsafe {
21039	let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
21040	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21041	}
21042	}
21043
21044	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21045	///
21046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21047	#[inline]
21048	#[target_feature(enable = "avx512f,avx512vl")]
21049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21050	#[cfg_attr(test, assert_instr(vpsravd))]
21051	pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21052	unsafe {
21053	let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
21054	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21055	}
21056	}
21057
21058	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21059	///
21060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21061	#[inline]
21062	#[target_feature(enable = "avx512f,avx512vl")]
21063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21064	#[cfg_attr(test, assert_instr(vpsravd))]
21065	pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21066	unsafe {
21067	let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
21068	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21069	}
21070	}
21071
21072	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21073	///
21074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21075	#[inline]
21076	#[target_feature(enable = "avx512f,avx512vl")]
21077	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21078	#[cfg_attr(test, assert_instr(vpsravd))]
21079	pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21080	unsafe {
21081	let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
21082	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21083	}
21084	}
21085
21086	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21087	///
21088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21089	#[inline]
21090	#[target_feature(enable = "avx512f,avx512vl")]
21091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21092	#[cfg_attr(test, assert_instr(vpsravd))]
21093	pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21094	unsafe {
21095	let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
21096	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21097	}
21098	}
21099
21100	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21101	///
21102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21103	#[inline]
21104	#[target_feature(enable = "avx512f")]
21105	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21106	#[cfg_attr(test, assert_instr(vpsravq))]
21107	pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21108	unsafe { transmute(src:vpsravq(a.as_i64x8(), count.as_i64x8())) }
21109	}
21110
21111	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21112	///
21113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21114	#[inline]
21115	#[target_feature(enable = "avx512f")]
21116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21117	#[cfg_attr(test, assert_instr(vpsravq))]
21118	pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21119	unsafe {
21120	let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
21121	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21122	}
21123	}
21124
21125	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21126	///
21127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21128	#[inline]
21129	#[target_feature(enable = "avx512f")]
21130	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21131	#[cfg_attr(test, assert_instr(vpsravq))]
21132	pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21133	unsafe {
21134	let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
21135	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21136	}
21137	}
21138
21139	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21140	///
21141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21142	#[inline]
21143	#[target_feature(enable = "avx512f,avx512vl")]
21144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21145	#[cfg_attr(test, assert_instr(vpsravq))]
21146	pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21147	unsafe { transmute(src:vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21148	}
21149
21150	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21151	///
21152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21153	#[inline]
21154	#[target_feature(enable = "avx512f,avx512vl")]
21155	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21156	#[cfg_attr(test, assert_instr(vpsravq))]
21157	pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21158	unsafe {
21159	let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
21160	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21161	}
21162	}
21163
21164	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21165	///
21166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21167	#[inline]
21168	#[target_feature(enable = "avx512f,avx512vl")]
21169	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21170	#[cfg_attr(test, assert_instr(vpsravq))]
21171	pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21172	unsafe {
21173	let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
21174	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21175	}
21176	}
21177
21178	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21179	///
21180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21181	#[inline]
21182	#[target_feature(enable = "avx512f,avx512vl")]
21183	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21184	#[cfg_attr(test, assert_instr(vpsravq))]
21185	pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21186	unsafe { transmute(src:vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21187	}
21188
21189	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21190	///
21191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21192	#[inline]
21193	#[target_feature(enable = "avx512f,avx512vl")]
21194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21195	#[cfg_attr(test, assert_instr(vpsravq))]
21196	pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21197	unsafe {
21198	let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
21199	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21200	}
21201	}
21202
21203	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21204	///
21205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21206	#[inline]
21207	#[target_feature(enable = "avx512f,avx512vl")]
21208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21209	#[cfg_attr(test, assert_instr(vpsravq))]
21210	pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21211	unsafe {
21212	let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
21213	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21214	}
21215	}
21216
21217	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21218	///
21219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21220	#[inline]
21221	#[target_feature(enable = "avx512f")]
21222	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21223	#[cfg_attr(test, assert_instr(vprolvd))]
21224	pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21225	unsafe { transmute(src:vprolvd(a.as_i32x16(), b.as_i32x16())) }
21226	}
21227
21228	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21229	///
21230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21231	#[inline]
21232	#[target_feature(enable = "avx512f")]
21233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21234	#[cfg_attr(test, assert_instr(vprolvd))]
21235	pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21236	unsafe {
21237	let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
21238	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
21239	}
21240	}
21241
21242	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21243	///
21244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21245	#[inline]
21246	#[target_feature(enable = "avx512f")]
21247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21248	#[cfg_attr(test, assert_instr(vprolvd))]
21249	pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21250	unsafe {
21251	let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
21252	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x16::ZERO))
21253	}
21254	}
21255
21256	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21257	///
21258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21259	#[inline]
21260	#[target_feature(enable = "avx512f,avx512vl")]
21261	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21262	#[cfg_attr(test, assert_instr(vprolvd))]
21263	pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21264	unsafe { transmute(src:vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21265	}
21266
21267	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21268	///
21269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21270	#[inline]
21271	#[target_feature(enable = "avx512f,avx512vl")]
21272	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21273	#[cfg_attr(test, assert_instr(vprolvd))]
21274	pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21275	unsafe {
21276	let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
21277	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
21278	}
21279	}
21280
21281	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21282	///
21283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21284	#[inline]
21285	#[target_feature(enable = "avx512f,avx512vl")]
21286	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21287	#[cfg_attr(test, assert_instr(vprolvd))]
21288	pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21289	unsafe {
21290	let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
21291	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x8::ZERO))
21292	}
21293	}
21294
21295	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21296	///
21297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21298	#[inline]
21299	#[target_feature(enable = "avx512f,avx512vl")]
21300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21301	#[cfg_attr(test, assert_instr(vprolvd))]
21302	pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21303	unsafe { transmute(src:vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21304	}
21305
21306	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21307	///
21308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21309	#[inline]
21310	#[target_feature(enable = "avx512f,avx512vl")]
21311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21312	#[cfg_attr(test, assert_instr(vprolvd))]
21313	pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21314	unsafe {
21315	let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
21316	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
21317	}
21318	}
21319
21320	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21321	///
21322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21323	#[inline]
21324	#[target_feature(enable = "avx512f,avx512vl")]
21325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21326	#[cfg_attr(test, assert_instr(vprolvd))]
21327	pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21328	unsafe {
21329	let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
21330	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x4::ZERO))
21331	}
21332	}
21333
21334	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21335	///
21336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21337	#[inline]
21338	#[target_feature(enable = "avx512f")]
21339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21340	#[cfg_attr(test, assert_instr(vprorvd))]
21341	pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21342	unsafe { transmute(src:vprorvd(a.as_i32x16(), b.as_i32x16())) }
21343	}
21344
21345	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21346	///
21347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21348	#[inline]
21349	#[target_feature(enable = "avx512f")]
21350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21351	#[cfg_attr(test, assert_instr(vprorvd))]
21352	pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21353	unsafe {
21354	let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
21355	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
21356	}
21357	}
21358
21359	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21360	///
21361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21362	#[inline]
21363	#[target_feature(enable = "avx512f")]
21364	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21365	#[cfg_attr(test, assert_instr(vprorvd))]
21366	pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21367	unsafe {
21368	let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
21369	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x16::ZERO))
21370	}
21371	}
21372
21373	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21374	///
21375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21376	#[inline]
21377	#[target_feature(enable = "avx512f,avx512vl")]
21378	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21379	#[cfg_attr(test, assert_instr(vprorvd))]
21380	pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21381	unsafe { transmute(src:vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21382	}
21383
21384	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21385	///
21386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21387	#[inline]
21388	#[target_feature(enable = "avx512f,avx512vl")]
21389	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21390	#[cfg_attr(test, assert_instr(vprorvd))]
21391	pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21392	unsafe {
21393	let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
21394	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
21395	}
21396	}
21397
21398	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21399	///
21400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21401	#[inline]
21402	#[target_feature(enable = "avx512f,avx512vl")]
21403	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21404	#[cfg_attr(test, assert_instr(vprorvd))]
21405	pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21406	unsafe {
21407	let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
21408	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x8::ZERO))
21409	}
21410	}
21411
21412	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21413	///
21414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21415	#[inline]
21416	#[target_feature(enable = "avx512f,avx512vl")]
21417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21418	#[cfg_attr(test, assert_instr(vprorvd))]
21419	pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21420	unsafe { transmute(src:vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21421	}
21422
21423	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21424	///
21425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21426	#[inline]
21427	#[target_feature(enable = "avx512f,avx512vl")]
21428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21429	#[cfg_attr(test, assert_instr(vprorvd))]
21430	pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21431	unsafe {
21432	let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
21433	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
21434	}
21435	}
21436
21437	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21438	///
21439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21440	#[inline]
21441	#[target_feature(enable = "avx512f,avx512vl")]
21442	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21443	#[cfg_attr(test, assert_instr(vprorvd))]
21444	pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21445	unsafe {
21446	let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
21447	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x4::ZERO))
21448	}
21449	}
21450
21451	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21452	///
21453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21454	#[inline]
21455	#[target_feature(enable = "avx512f")]
21456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21457	#[cfg_attr(test, assert_instr(vprolvq))]
21458	pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21459	unsafe { transmute(src:vprolvq(a.as_i64x8(), b.as_i64x8())) }
21460	}
21461
21462	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21463	///
21464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21465	#[inline]
21466	#[target_feature(enable = "avx512f")]
21467	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21468	#[cfg_attr(test, assert_instr(vprolvq))]
21469	pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21470	unsafe {
21471	let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
21472	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
21473	}
21474	}
21475
21476	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21477	///
21478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21479	#[inline]
21480	#[target_feature(enable = "avx512f")]
21481	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21482	#[cfg_attr(test, assert_instr(vprolvq))]
21483	pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21484	unsafe {
21485	let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
21486	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x8::ZERO))
21487	}
21488	}
21489
21490	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21491	///
21492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21493	#[inline]
21494	#[target_feature(enable = "avx512f,avx512vl")]
21495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496	#[cfg_attr(test, assert_instr(vprolvq))]
21497	pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21498	unsafe { transmute(src:vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21499	}
21500
21501	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21502	///
21503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21504	#[inline]
21505	#[target_feature(enable = "avx512f,avx512vl")]
21506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21507	#[cfg_attr(test, assert_instr(vprolvq))]
21508	pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21509	unsafe {
21510	let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
21511	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
21512	}
21513	}
21514
21515	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21516	///
21517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21518	#[inline]
21519	#[target_feature(enable = "avx512f,avx512vl")]
21520	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21521	#[cfg_attr(test, assert_instr(vprolvq))]
21522	pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21523	unsafe {
21524	let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
21525	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x4::ZERO))
21526	}
21527	}
21528
21529	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21530	///
21531	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21532	#[inline]
21533	#[target_feature(enable = "avx512f,avx512vl")]
21534	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21535	#[cfg_attr(test, assert_instr(vprolvq))]
21536	pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21537	unsafe { transmute(src:vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21538	}
21539
21540	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21541	///
21542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21543	#[inline]
21544	#[target_feature(enable = "avx512f,avx512vl")]
21545	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21546	#[cfg_attr(test, assert_instr(vprolvq))]
21547	pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21548	unsafe {
21549	let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
21550	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
21551	}
21552	}
21553
21554	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21555	///
21556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21557	#[inline]
21558	#[target_feature(enable = "avx512f,avx512vl")]
21559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21560	#[cfg_attr(test, assert_instr(vprolvq))]
21561	pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21562	unsafe {
21563	let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
21564	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x2::ZERO))
21565	}
21566	}
21567
21568	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21569	///
21570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21571	#[inline]
21572	#[target_feature(enable = "avx512f")]
21573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21574	#[cfg_attr(test, assert_instr(vprorvq))]
21575	pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21576	unsafe { transmute(src:vprorvq(a.as_i64x8(), b.as_i64x8())) }
21577	}
21578
21579	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21580	///
21581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21582	#[inline]
21583	#[target_feature(enable = "avx512f")]
21584	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21585	#[cfg_attr(test, assert_instr(vprorvq))]
21586	pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21587	unsafe {
21588	let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
21589	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
21590	}
21591	}
21592
21593	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21594	///
21595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21596	#[inline]
21597	#[target_feature(enable = "avx512f")]
21598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21599	#[cfg_attr(test, assert_instr(vprorvq))]
21600	pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21601	unsafe {
21602	let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
21603	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x8::ZERO))
21604	}
21605	}
21606
21607	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21608	///
21609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21610	#[inline]
21611	#[target_feature(enable = "avx512f,avx512vl")]
21612	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21613	#[cfg_attr(test, assert_instr(vprorvq))]
21614	pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21615	unsafe { transmute(src:vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21616	}
21617
21618	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21619	///
21620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21621	#[inline]
21622	#[target_feature(enable = "avx512f,avx512vl")]
21623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21624	#[cfg_attr(test, assert_instr(vprorvq))]
21625	pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21626	unsafe {
21627	let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
21628	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
21629	}
21630	}
21631
21632	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21633	///
21634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21635	#[inline]
21636	#[target_feature(enable = "avx512f,avx512vl")]
21637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21638	#[cfg_attr(test, assert_instr(vprorvq))]
21639	pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21640	unsafe {
21641	let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
21642	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x4::ZERO))
21643	}
21644	}
21645
21646	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21647	///
21648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21649	#[inline]
21650	#[target_feature(enable = "avx512f,avx512vl")]
21651	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21652	#[cfg_attr(test, assert_instr(vprorvq))]
21653	pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21654	unsafe { transmute(src:vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21655	}
21656
21657	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21658	///
21659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21660	#[inline]
21661	#[target_feature(enable = "avx512f,avx512vl")]
21662	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21663	#[cfg_attr(test, assert_instr(vprorvq))]
21664	pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21665	unsafe {
21666	let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
21667	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
21668	}
21669	}
21670
21671	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21672	///
21673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21674	#[inline]
21675	#[target_feature(enable = "avx512f,avx512vl")]
21676	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21677	#[cfg_attr(test, assert_instr(vprorvq))]
21678	pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21679	unsafe {
21680	let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
21681	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x2::ZERO))
21682	}
21683	}
21684
21685	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21686	///
21687	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21688	#[inline]
21689	#[target_feature(enable = "avx512f")]
21690	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21691	#[cfg_attr(test, assert_instr(vpsllvd))]
21692	pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21693	unsafe { transmute(src:vpsllvd(a.as_i32x16(), b:count.as_i32x16())) }
21694	}
21695
21696	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21697	///
21698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21699	#[inline]
21700	#[target_feature(enable = "avx512f")]
21701	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21702	#[cfg_attr(test, assert_instr(vpsllvd))]
21703	pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21704	unsafe {
21705	let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
21706	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21707	}
21708	}
21709
21710	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21711	///
21712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21713	#[inline]
21714	#[target_feature(enable = "avx512f")]
21715	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21716	#[cfg_attr(test, assert_instr(vpsllvd))]
21717	pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21718	unsafe {
21719	let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
21720	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21721	}
21722	}
21723
21724	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21725	///
21726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21727	#[inline]
21728	#[target_feature(enable = "avx512f,avx512vl")]
21729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21730	#[cfg_attr(test, assert_instr(vpsllvd))]
21731	pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21732	unsafe {
21733	let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
21734	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21735	}
21736	}
21737
21738	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21739	///
21740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21741	#[inline]
21742	#[target_feature(enable = "avx512f,avx512vl")]
21743	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21744	#[cfg_attr(test, assert_instr(vpsllvd))]
21745	pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21746	unsafe {
21747	let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
21748	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21749	}
21750	}
21751
21752	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21753	///
21754	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21755	#[inline]
21756	#[target_feature(enable = "avx512f,avx512vl")]
21757	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21758	#[cfg_attr(test, assert_instr(vpsllvd))]
21759	pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21760	unsafe {
21761	let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
21762	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21763	}
21764	}
21765
21766	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21767	///
21768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21769	#[inline]
21770	#[target_feature(enable = "avx512f,avx512vl")]
21771	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21772	#[cfg_attr(test, assert_instr(vpsllvd))]
21773	pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21774	unsafe {
21775	let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
21776	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21777	}
21778	}
21779
21780	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21781	///
21782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21783	#[inline]
21784	#[target_feature(enable = "avx512f")]
21785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21786	#[cfg_attr(test, assert_instr(vpsrlvd))]
21787	pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21788	unsafe { transmute(src:vpsrlvd(a.as_i32x16(), b:count.as_i32x16())) }
21789	}
21790
21791	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21792	///
21793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21794	#[inline]
21795	#[target_feature(enable = "avx512f")]
21796	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21797	#[cfg_attr(test, assert_instr(vpsrlvd))]
21798	pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21799	unsafe {
21800	let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
21801	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21802	}
21803	}
21804
21805	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21806	///
21807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21808	#[inline]
21809	#[target_feature(enable = "avx512f")]
21810	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811	#[cfg_attr(test, assert_instr(vpsrlvd))]
21812	pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21813	unsafe {
21814	let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
21815	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21816	}
21817	}
21818
21819	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21820	///
21821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21822	#[inline]
21823	#[target_feature(enable = "avx512f,avx512vl")]
21824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21825	#[cfg_attr(test, assert_instr(vpsrlvd))]
21826	pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21827	unsafe {
21828	let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
21829	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21830	}
21831	}
21832
21833	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21834	///
21835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21836	#[inline]
21837	#[target_feature(enable = "avx512f,avx512vl")]
21838	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21839	#[cfg_attr(test, assert_instr(vpsrlvd))]
21840	pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21841	unsafe {
21842	let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
21843	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21844	}
21845	}
21846
21847	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21848	///
21849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21850	#[inline]
21851	#[target_feature(enable = "avx512f,avx512vl")]
21852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21853	#[cfg_attr(test, assert_instr(vpsrlvd))]
21854	pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21855	unsafe {
21856	let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
21857	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21858	}
21859	}
21860
21861	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21862	///
21863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21864	#[inline]
21865	#[target_feature(enable = "avx512f,avx512vl")]
21866	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21867	#[cfg_attr(test, assert_instr(vpsrlvd))]
21868	pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21869	unsafe {
21870	let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
21871	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21872	}
21873	}
21874
21875	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21876	///
21877	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21878	#[inline]
21879	#[target_feature(enable = "avx512f")]
21880	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21881	#[cfg_attr(test, assert_instr(vpsllvq))]
21882	pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21883	unsafe { transmute(src:vpsllvq(a.as_i64x8(), b:count.as_i64x8())) }
21884	}
21885
21886	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21887	///
21888	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21889	#[inline]
21890	#[target_feature(enable = "avx512f")]
21891	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21892	#[cfg_attr(test, assert_instr(vpsllvq))]
21893	pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21894	unsafe {
21895	let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
21896	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21897	}
21898	}
21899
21900	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21901	///
21902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21903	#[inline]
21904	#[target_feature(enable = "avx512f")]
21905	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21906	#[cfg_attr(test, assert_instr(vpsllvq))]
21907	pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21908	unsafe {
21909	let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
21910	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21911	}
21912	}
21913
21914	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21915	///
21916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21917	#[inline]
21918	#[target_feature(enable = "avx512f,avx512vl")]
21919	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920	#[cfg_attr(test, assert_instr(vpsllvq))]
21921	pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21922	unsafe {
21923	let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
21924	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21925	}
21926	}
21927
21928	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21929	///
21930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
21931	#[inline]
21932	#[target_feature(enable = "avx512f,avx512vl")]
21933	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21934	#[cfg_attr(test, assert_instr(vpsllvq))]
21935	pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21936	unsafe {
21937	let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
21938	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21939	}
21940	}
21941
21942	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21943	///
21944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
21945	#[inline]
21946	#[target_feature(enable = "avx512f,avx512vl")]
21947	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21948	#[cfg_attr(test, assert_instr(vpsllvq))]
21949	pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21950	unsafe {
21951	let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
21952	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21953	}
21954	}
21955
21956	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21957	///
21958	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
21959	#[inline]
21960	#[target_feature(enable = "avx512f,avx512vl")]
21961	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21962	#[cfg_attr(test, assert_instr(vpsllvq))]
21963	pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21964	unsafe {
21965	let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
21966	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21967	}
21968	}
21969
21970	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21971	///
21972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
21973	#[inline]
21974	#[target_feature(enable = "avx512f")]
21975	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21976	#[cfg_attr(test, assert_instr(vpsrlvq))]
21977	pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
21978	unsafe { transmute(src:vpsrlvq(a.as_i64x8(), b:count.as_i64x8())) }
21979	}
21980
21981	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21982	///
21983	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
21984	#[inline]
21985	#[target_feature(enable = "avx512f")]
21986	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21987	#[cfg_attr(test, assert_instr(vpsrlvq))]
21988	pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21989	unsafe {
21990	let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
21991	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21992	}
21993	}
21994
21995	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21996	///
21997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
21998	#[inline]
21999	#[target_feature(enable = "avx512f")]
22000	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22001	#[cfg_attr(test, assert_instr(vpsrlvq))]
22002	pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22003	unsafe {
22004	let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
22005	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22006	}
22007	}
22008
22009	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22010	///
22011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22012	#[inline]
22013	#[target_feature(enable = "avx512f,avx512vl")]
22014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22015	#[cfg_attr(test, assert_instr(vpsrlvq))]
22016	pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22017	unsafe {
22018	let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
22019	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22020	}
22021	}
22022
22023	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22024	///
22025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22026	#[inline]
22027	#[target_feature(enable = "avx512f,avx512vl")]
22028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22029	#[cfg_attr(test, assert_instr(vpsrlvq))]
22030	pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22031	unsafe {
22032	let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
22033	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22034	}
22035	}
22036
22037	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22038	///
22039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22040	#[inline]
22041	#[target_feature(enable = "avx512f,avx512vl")]
22042	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22043	#[cfg_attr(test, assert_instr(vpsrlvq))]
22044	pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22045	unsafe {
22046	let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
22047	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22048	}
22049	}
22050
22051	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22052	///
22053	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22054	#[inline]
22055	#[target_feature(enable = "avx512f,avx512vl")]
22056	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22057	#[cfg_attr(test, assert_instr(vpsrlvq))]
22058	pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22059	unsafe {
22060	let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
22061	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22062	}
22063	}
22064
22065	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22066	///
22067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22068	#[inline]
22069	#[target_feature(enable = "avx512f")]
22070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22071	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22072	#[rustc_legacy_const_generics(`1`)]
22073	pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22074	unsafe {
22075	static_assert_uimm_bits!(MASK, `8`);
22076	simd_shuffle!(
22077	a,
22078	a,
22079	[
22080	MASK as u32 & `0b11`,
22081	(MASK as u32 >> `2`) & `0b11`,
22082	((MASK as u32 >> `4`) & `0b11`),
22083	((MASK as u32 >> `6`) & `0b11`),
22084	(MASK as u32 & `0b11`) + `4`,
22085	((MASK as u32 >> `2`) & `0b11`) + `4`,
22086	((MASK as u32 >> `4`) & `0b11`) + `4`,
22087	((MASK as u32 >> `6`) & `0b11`) + `4`,
22088	(MASK as u32 & `0b11`) + `8`,
22089	((MASK as u32 >> `2`) & `0b11`) + `8`,
22090	((MASK as u32 >> `4`) & `0b11`) + `8`,
22091	((MASK as u32 >> `6`) & `0b11`) + `8`,
22092	(MASK as u32 & `0b11`) + `12`,
22093	((MASK as u32 >> `2`) & `0b11`) + `12`,
22094	((MASK as u32 >> `4`) & `0b11`) + `12`,
22095	((MASK as u32 >> `6`) & `0b11`) + `12`,
22096	],
22097	)
22098	}
22099	}
22100
22101	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22102	///
22103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22104	#[inline]
22105	#[target_feature(enable = "avx512f")]
22106	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22107	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22108	#[rustc_legacy_const_generics(`3`)]
22109	pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22110	unsafe {
22111	static_assert_uimm_bits!(MASK, `8`);
22112	let r: __m512 = _mm512_permute_ps::<MASK>(a);
22113	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22114	}
22115	}
22116
22117	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118	///
22119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22120	#[inline]
22121	#[target_feature(enable = "avx512f")]
22122	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22123	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22124	#[rustc_legacy_const_generics(`2`)]
22125	pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22126	unsafe {
22127	static_assert_uimm_bits!(MASK, `8`);
22128	let r: __m512 = _mm512_permute_ps::<MASK>(a);
22129	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
22130	}
22131	}
22132
22133	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22134	///
22135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22136	#[inline]
22137	#[target_feature(enable = "avx512f,avx512vl")]
22138	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22139	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22140	#[rustc_legacy_const_generics(`3`)]
22141	pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22142	unsafe {
22143	let r: __m256 = _mm256_permute_ps::<MASK>(a);
22144	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22145	}
22146	}
22147
22148	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22149	///
22150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22151	#[inline]
22152	#[target_feature(enable = "avx512f,avx512vl")]
22153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22154	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22155	#[rustc_legacy_const_generics(`2`)]
22156	pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22157	unsafe {
22158	let r: __m256 = _mm256_permute_ps::<MASK>(a);
22159	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
22160	}
22161	}
22162
22163	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22164	///
22165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22166	#[inline]
22167	#[target_feature(enable = "avx512f,avx512vl")]
22168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22169	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22170	#[rustc_legacy_const_generics(`3`)]
22171	pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22172	unsafe {
22173	let r: __m128 = _mm_permute_ps::<MASK>(a);
22174	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22175	}
22176	}
22177
22178	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22179	///
22180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22181	#[inline]
22182	#[target_feature(enable = "avx512f,avx512vl")]
22183	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22184	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22185	#[rustc_legacy_const_generics(`2`)]
22186	pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22187	unsafe {
22188	let r: __m128 = _mm_permute_ps::<MASK>(a);
22189	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
22190	}
22191	}
22192
22193	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22194	///
22195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22196	#[inline]
22197	#[target_feature(enable = "avx512f")]
22198	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22199	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
22200	#[rustc_legacy_const_generics(`1`)]
22201	pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22202	unsafe {
22203	static_assert_uimm_bits!(MASK, `8`);
22204	simd_shuffle!(
22205	a,
22206	a,
22207	[
22208	MASK as u32 & `0b1`,
22209	((MASK as u32 >> `1`) & `0b1`),
22210	((MASK as u32 >> `2`) & `0b1`) + `2`,
22211	((MASK as u32 >> `3`) & `0b1`) + `2`,
22212	((MASK as u32 >> `4`) & `0b1`) + `4`,
22213	((MASK as u32 >> `5`) & `0b1`) + `4`,
22214	((MASK as u32 >> `6`) & `0b1`) + `6`,
22215	((MASK as u32 >> `7`) & `0b1`) + `6`,
22216	],
22217	)
22218	}
22219	}
22220
22221	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22222	///
22223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22224	#[inline]
22225	#[target_feature(enable = "avx512f")]
22226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22227	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
22228	#[rustc_legacy_const_generics(`3`)]
22229	pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22230	unsafe {
22231	static_assert_uimm_bits!(MASK, `8`);
22232	let r: __m512d = _mm512_permute_pd::<MASK>(a);
22233	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22234	}
22235	}
22236
22237	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22238	///
22239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22240	#[inline]
22241	#[target_feature(enable = "avx512f")]
22242	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22243	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
22244	#[rustc_legacy_const_generics(`2`)]
22245	pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22246	unsafe {
22247	static_assert_uimm_bits!(MASK, `8`);
22248	let r: __m512d = _mm512_permute_pd::<MASK>(a);
22249	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
22250	}
22251	}
22252
22253	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22254	///
22255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22256	#[inline]
22257	#[target_feature(enable = "avx512f,avx512vl")]
22258	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22259	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01`))]
22260	#[rustc_legacy_const_generics(`3`)]
22261	pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22262	unsafe {
22263	static_assert_uimm_bits!(MASK, `4`);
22264	let r: __m256d = _mm256_permute_pd::<MASK>(a);
22265	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22266	}
22267	}
22268
22269	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22270	///
22271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22272	#[inline]
22273	#[target_feature(enable = "avx512f,avx512vl")]
22274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22275	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01`))]
22276	#[rustc_legacy_const_generics(`2`)]
22277	pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22278	unsafe {
22279	static_assert_uimm_bits!(MASK, `4`);
22280	let r: __m256d = _mm256_permute_pd::<MASK>(a);
22281	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
22282	}
22283	}
22284
22285	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22286	///
22287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22288	#[inline]
22289	#[target_feature(enable = "avx512f,avx512vl")]
22290	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22291	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0b01`))]
22292	#[rustc_legacy_const_generics(`3`)]
22293	pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22294	unsafe {
22295	static_assert_uimm_bits!(IMM2, `2`);
22296	let r: __m128d = _mm_permute_pd::<IMM2>(a);
22297	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
22298	}
22299	}
22300
22301	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22302	///
22303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22304	#[inline]
22305	#[target_feature(enable = "avx512f,avx512vl")]
22306	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22307	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0b01`))]
22308	#[rustc_legacy_const_generics(`2`)]
22309	pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22310	unsafe {
22311	static_assert_uimm_bits!(IMM2, `2`);
22312	let r: __m128d = _mm_permute_pd::<IMM2>(a);
22313	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
22314	}
22315	}
22316
22317	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22318	///
22319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22320	#[inline]
22321	#[target_feature(enable = "avx512f")]
22322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22323	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
22324	#[rustc_legacy_const_generics(`1`)]
22325	pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22326	unsafe {
22327	static_assert_uimm_bits!(MASK, `8`);
22328	simd_shuffle!(
22329	a,
22330	a,
22331	[
22332	MASK as u32 & `0b11`,
22333	(MASK as u32 >> `2`) & `0b11`,
22334	((MASK as u32 >> `4`) & `0b11`),
22335	((MASK as u32 >> `6`) & `0b11`),
22336	(MASK as u32 & `0b11`) + `4`,
22337	((MASK as u32 >> `2`) & `0b11`) + `4`,
22338	((MASK as u32 >> `4`) & `0b11`) + `4`,
22339	((MASK as u32 >> `6`) & `0b11`) + `4`,
22340	],
22341	)
22342	}
22343	}
22344
22345	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22346	///
22347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22348	#[inline]
22349	#[target_feature(enable = "avx512f")]
22350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22351	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
22352	#[rustc_legacy_const_generics(`3`)]
22353	pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22354	src: __m512i,
22355	k: __mmask8,
22356	a: __m512i,
22357	) -> __m512i {
22358	unsafe {
22359	static_assert_uimm_bits!(MASK, `8`);
22360	let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
22361	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
22362	}
22363	}
22364
22365	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22366	///
22367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22368	#[inline]
22369	#[target_feature(enable = "avx512f")]
22370	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22371	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
22372	#[rustc_legacy_const_generics(`2`)]
22373	pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22374	unsafe {
22375	static_assert_uimm_bits!(MASK, `8`);
22376	let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
22377	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
22378	}
22379	}
22380
22381	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22382	///
22383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22384	#[inline]
22385	#[target_feature(enable = "avx512f,avx512vl")]
22386	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22387	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
22388	#[rustc_legacy_const_generics(`1`)]
22389	pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22390	unsafe {
22391	static_assert_uimm_bits!(MASK, `8`);
22392	simd_shuffle!(
22393	a,
22394	a,
22395	[
22396	MASK as u32 & `0b11`,
22397	(MASK as u32 >> `2`) & `0b11`,
22398	((MASK as u32 >> `4`) & `0b11`),
22399	((MASK as u32 >> `6`) & `0b11`),
22400	],
22401	)
22402	}
22403	}
22404
22405	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22406	///
22407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22408	#[inline]
22409	#[target_feature(enable = "avx512f,avx512vl")]
22410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22411	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
22412	#[rustc_legacy_const_generics(`3`)]
22413	pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22414	src: __m256i,
22415	k: __mmask8,
22416	a: __m256i,
22417	) -> __m256i {
22418	unsafe {
22419	static_assert_uimm_bits!(MASK, `8`);
22420	let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
22421	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
22422	}
22423	}
22424
22425	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22426	///
22427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22428	#[inline]
22429	#[target_feature(enable = "avx512f,avx512vl")]
22430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22431	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
22432	#[rustc_legacy_const_generics(`2`)]
22433	pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22434	unsafe {
22435	static_assert_uimm_bits!(MASK, `8`);
22436	let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
22437	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
22438	}
22439	}
22440
22441	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22442	///
22443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22444	#[inline]
22445	#[target_feature(enable = "avx512f")]
22446	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22447	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
22448	#[rustc_legacy_const_generics(`1`)]
22449	pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22450	unsafe {
22451	static_assert_uimm_bits!(MASK, `8`);
22452	simd_shuffle!(
22453	a,
22454	a,
22455	[
22456	MASK as u32 & `0b11`,
22457	(MASK as u32 >> `2`) & `0b11`,
22458	((MASK as u32 >> `4`) & `0b11`),
22459	((MASK as u32 >> `6`) & `0b11`),
22460	(MASK as u32 & `0b11`) + `4`,
22461	((MASK as u32 >> `2`) & `0b11`) + `4`,
22462	((MASK as u32 >> `4`) & `0b11`) + `4`,
22463	((MASK as u32 >> `6`) & `0b11`) + `4`,
22464	],
22465	)
22466	}
22467	}
22468
22469	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22470	///
22471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22472	#[inline]
22473	#[target_feature(enable = "avx512f")]
22474	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22475	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
22476	#[rustc_legacy_const_generics(`3`)]
22477	pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22478	unsafe {
22479	let r: __m512d = _mm512_permutex_pd::<MASK>(a);
22480	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22481	}
22482	}
22483
22484	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22485	///
22486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22487	#[inline]
22488	#[target_feature(enable = "avx512f")]
22489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22490	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
22491	#[rustc_legacy_const_generics(`2`)]
22492	pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22493	unsafe {
22494	let r: __m512d = _mm512_permutex_pd::<MASK>(a);
22495	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
22496	}
22497	}
22498
22499	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22500	///
22501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22502	#[inline]
22503	#[target_feature(enable = "avx512f,avx512vl")]
22504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22505	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
22506	#[rustc_legacy_const_generics(`1`)]
22507	pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22508	unsafe {
22509	static_assert_uimm_bits!(MASK, `8`);
22510	simd_shuffle!(
22511	a,
22512	a,
22513	[
22514	MASK as u32 & `0b11`,
22515	(MASK as u32 >> `2`) & `0b11`,
22516	((MASK as u32 >> `4`) & `0b11`),
22517	((MASK as u32 >> `6`) & `0b11`),
22518	],
22519	)
22520	}
22521	}
22522
22523	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22524	///
22525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22526	#[inline]
22527	#[target_feature(enable = "avx512f,avx512vl")]
22528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22529	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
22530	#[rustc_legacy_const_generics(`3`)]
22531	pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22532	unsafe {
22533	static_assert_uimm_bits!(MASK, `8`);
22534	let r: __m256d = _mm256_permutex_pd::<MASK>(a);
22535	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22536	}
22537	}
22538
22539	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22540	///
22541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22542	#[inline]
22543	#[target_feature(enable = "avx512f,avx512vl")]
22544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22545	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
22546	#[rustc_legacy_const_generics(`2`)]
22547	pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22548	unsafe {
22549	static_assert_uimm_bits!(MASK, `8`);
22550	let r: __m256d = _mm256_permutex_pd::<MASK>(a);
22551	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
22552	}
22553	}
22554
22555	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22556	///
22557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22558	#[inline]
22559	#[target_feature(enable = "avx512f")]
22560	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22561	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22562	pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22563	unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
22564	}
22565
22566	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22567	///
22568	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22569	#[inline]
22570	#[target_feature(enable = "avx512f")]
22571	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22572	#[cfg_attr(test, assert_instr(vpermd))]
22573	pub fn _mm512_mask_permutevar_epi32(
22574	src: __m512i,
22575	k: __mmask16,
22576	idx: __m512i,
22577	a: __m512i,
22578	) -> __m512i {
22579	unsafe {
22580	let permute: i32x16 = _mm512_permutevar_epi32(idx, a).as_i32x16();
22581	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
22582	}
22583	}
22584
22585	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22586	///
22587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22588	#[inline]
22589	#[target_feature(enable = "avx512f")]
22590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22591	#[cfg_attr(test, assert_instr(vpermilps))]
22592	pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22593	unsafe { transmute(src:vpermilps(a.as_f32x16(), b.as_i32x16())) }
22594	}
22595
22596	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22597	///
22598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22599	#[inline]
22600	#[target_feature(enable = "avx512f")]
22601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22602	#[cfg_attr(test, assert_instr(vpermilps))]
22603	pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22604	unsafe {
22605	let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
22606	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
22607	}
22608	}
22609
22610	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22611	///
22612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22613	#[inline]
22614	#[target_feature(enable = "avx512f")]
22615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22616	#[cfg_attr(test, assert_instr(vpermilps))]
22617	pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22618	unsafe {
22619	let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
22620	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
22621	}
22622	}
22623
22624	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22625	///
22626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22627	#[inline]
22628	#[target_feature(enable = "avx512f,avx512vl")]
22629	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22630	#[cfg_attr(test, assert_instr(vpermilps))]
22631	pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22632	unsafe {
22633	let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
22634	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
22635	}
22636	}
22637
22638	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22639	///
22640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22641	#[inline]
22642	#[target_feature(enable = "avx512f,avx512vl")]
22643	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22644	#[cfg_attr(test, assert_instr(vpermilps))]
22645	pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22646	unsafe {
22647	let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
22648	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
22649	}
22650	}
22651
22652	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22653	///
22654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22655	#[inline]
22656	#[target_feature(enable = "avx512f,avx512vl")]
22657	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22658	#[cfg_attr(test, assert_instr(vpermilps))]
22659	pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22660	unsafe {
22661	let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
22662	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
22663	}
22664	}
22665
22666	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22667	///
22668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22669	#[inline]
22670	#[target_feature(enable = "avx512f,avx512vl")]
22671	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22672	#[cfg_attr(test, assert_instr(vpermilps))]
22673	pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22674	unsafe {
22675	let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
22676	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
22677	}
22678	}
22679
22680	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22681	///
22682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22683	#[inline]
22684	#[target_feature(enable = "avx512f")]
22685	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22686	#[cfg_attr(test, assert_instr(vpermilpd))]
22687	pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22688	unsafe { transmute(src:vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22689	}
22690
22691	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22692	///
22693	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22694	#[inline]
22695	#[target_feature(enable = "avx512f")]
22696	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22697	#[cfg_attr(test, assert_instr(vpermilpd))]
22698	pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22699	unsafe {
22700	let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
22701	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
22702	}
22703	}
22704
22705	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22706	///
22707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22708	#[inline]
22709	#[target_feature(enable = "avx512f")]
22710	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22711	#[cfg_attr(test, assert_instr(vpermilpd))]
22712	pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22713	unsafe {
22714	let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
22715	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
22716	}
22717	}
22718
22719	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22720	///
22721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22722	#[inline]
22723	#[target_feature(enable = "avx512f,avx512vl")]
22724	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22725	#[cfg_attr(test, assert_instr(vpermilpd))]
22726	pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22727	unsafe {
22728	let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
22729	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
22730	}
22731	}
22732
22733	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22734	///
22735	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22736	#[inline]
22737	#[target_feature(enable = "avx512f,avx512vl")]
22738	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22739	#[cfg_attr(test, assert_instr(vpermilpd))]
22740	pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22741	unsafe {
22742	let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
22743	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
22744	}
22745	}
22746
22747	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22748	///
22749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22750	#[inline]
22751	#[target_feature(enable = "avx512f,avx512vl")]
22752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22753	#[cfg_attr(test, assert_instr(vpermilpd))]
22754	pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22755	unsafe {
22756	let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
22757	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
22758	}
22759	}
22760
22761	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22762	///
22763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22764	#[inline]
22765	#[target_feature(enable = "avx512f,avx512vl")]
22766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22767	#[cfg_attr(test, assert_instr(vpermilpd))]
22768	pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22769	unsafe {
22770	let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
22771	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
22772	}
22773	}
22774
22775	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22776	///
22777	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22778	#[inline]
22779	#[target_feature(enable = "avx512f")]
22780	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22781	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22782	pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22783	unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
22784	}
22785
22786	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22787	///
22788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22789	#[inline]
22790	#[target_feature(enable = "avx512f")]
22791	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22792	#[cfg_attr(test, assert_instr(vpermd))]
22793	pub fn _mm512_mask_permutexvar_epi32(
22794	src: __m512i,
22795	k: __mmask16,
22796	idx: __m512i,
22797	a: __m512i,
22798	) -> __m512i {
22799	unsafe {
22800	let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22801	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
22802	}
22803	}
22804
22805	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22806	///
22807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22808	#[inline]
22809	#[target_feature(enable = "avx512f")]
22810	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22811	#[cfg_attr(test, assert_instr(vpermd))]
22812	pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22813	unsafe {
22814	let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22815	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
22816	}
22817	}
22818
22819	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22820	///
22821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22822	#[inline]
22823	#[target_feature(enable = "avx512f,avx512vl")]
22824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22825	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22826	pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22827	_mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
22828	}
22829
22830	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22831	///
22832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22833	#[inline]
22834	#[target_feature(enable = "avx512f,avx512vl")]
22835	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22836	#[cfg_attr(test, assert_instr(vpermd))]
22837	pub fn _mm256_mask_permutexvar_epi32(
22838	src: __m256i,
22839	k: __mmask8,
22840	idx: __m256i,
22841	a: __m256i,
22842	) -> __m256i {
22843	unsafe {
22844	let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22845	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
22846	}
22847	}
22848
22849	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22850	///
22851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22852	#[inline]
22853	#[target_feature(enable = "avx512f,avx512vl")]
22854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22855	#[cfg_attr(test, assert_instr(vpermd))]
22856	pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22857	unsafe {
22858	let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22859	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
22860	}
22861	}
22862
22863	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22864	///
22865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22866	#[inline]
22867	#[target_feature(enable = "avx512f")]
22868	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22869	#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22870	pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22871	unsafe { transmute(src:vpermq(a.as_i64x8(), idx.as_i64x8())) }
22872	}
22873
22874	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22875	///
22876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22877	#[inline]
22878	#[target_feature(enable = "avx512f")]
22879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22880	#[cfg_attr(test, assert_instr(vpermq))]
22881	pub fn _mm512_mask_permutexvar_epi64(
22882	src: __m512i,
22883	k: __mmask8,
22884	idx: __m512i,
22885	a: __m512i,
22886	) -> __m512i {
22887	unsafe {
22888	let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22889	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
22890	}
22891	}
22892
22893	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22894	///
22895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22896	#[inline]
22897	#[target_feature(enable = "avx512f")]
22898	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22899	#[cfg_attr(test, assert_instr(vpermq))]
22900	pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22901	unsafe {
22902	let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22903	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
22904	}
22905	}
22906
22907	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22908	///
22909	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22910	#[inline]
22911	#[target_feature(enable = "avx512f,avx512vl")]
22912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22913	#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22914	pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22915	unsafe { transmute(src:vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22916	}
22917
22918	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22919	///
22920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22921	#[inline]
22922	#[target_feature(enable = "avx512f,avx512vl")]
22923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22924	#[cfg_attr(test, assert_instr(vpermq))]
22925	pub fn _mm256_mask_permutexvar_epi64(
22926	src: __m256i,
22927	k: __mmask8,
22928	idx: __m256i,
22929	a: __m256i,
22930	) -> __m256i {
22931	unsafe {
22932	let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22933	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
22934	}
22935	}
22936
22937	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22938	///
22939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
22940	#[inline]
22941	#[target_feature(enable = "avx512f,avx512vl")]
22942	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22943	#[cfg_attr(test, assert_instr(vpermq))]
22944	pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22945	unsafe {
22946	let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22947	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
22948	}
22949	}
22950
22951	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22952	///
22953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
22954	#[inline]
22955	#[target_feature(enable = "avx512f")]
22956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957	#[cfg_attr(test, assert_instr(vpermps))]
22958	pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
22959	unsafe { transmute(src:vpermps(a.as_f32x16(), idx.as_i32x16())) }
22960	}
22961
22962	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22963	///
22964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
22965	#[inline]
22966	#[target_feature(enable = "avx512f")]
22967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22968	#[cfg_attr(test, assert_instr(vpermps))]
22969	pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22970	unsafe {
22971	let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
22972	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
22973	}
22974	}
22975
22976	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22977	///
22978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
22979	#[inline]
22980	#[target_feature(enable = "avx512f")]
22981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22982	#[cfg_attr(test, assert_instr(vpermps))]
22983	pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22984	unsafe {
22985	let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
22986	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
22987	}
22988	}
22989
22990	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22991	///
22992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
22993	#[inline]
22994	#[target_feature(enable = "avx512f,avx512vl")]
22995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22996	#[cfg_attr(test, assert_instr(vpermps))]
22997	pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
22998	_mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
22999	}
23000
23001	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23002	///
23003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23004	#[inline]
23005	#[target_feature(enable = "avx512f,avx512vl")]
23006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23007	#[cfg_attr(test, assert_instr(vpermps))]
23008	pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23009	unsafe {
23010	let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
23011	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23012	}
23013	}
23014
23015	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23016	///
23017	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23018	#[inline]
23019	#[target_feature(enable = "avx512f,avx512vl")]
23020	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23021	#[cfg_attr(test, assert_instr(vpermps))]
23022	pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23023	unsafe {
23024	let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
23025	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23026	}
23027	}
23028
23029	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23030	///
23031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23032	#[inline]
23033	#[target_feature(enable = "avx512f")]
23034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23035	#[cfg_attr(test, assert_instr(vpermpd))]
23036	pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23037	unsafe { transmute(src:vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23038	}
23039
23040	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23041	///
23042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23043	#[inline]
23044	#[target_feature(enable = "avx512f")]
23045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23046	#[cfg_attr(test, assert_instr(vpermpd))]
23047	pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23048	unsafe {
23049	let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
23050	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23051	}
23052	}
23053
23054	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23055	///
23056	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23057	#[inline]
23058	#[target_feature(enable = "avx512f")]
23059	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23060	#[cfg_attr(test, assert_instr(vpermpd))]
23061	pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23062	unsafe {
23063	let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
23064	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23065	}
23066	}
23067
23068	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23069	///
23070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23071	#[inline]
23072	#[target_feature(enable = "avx512f,avx512vl")]
23073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23074	#[cfg_attr(test, assert_instr(vpermpd))]
23075	pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23076	unsafe { transmute(src:vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23077	}
23078
23079	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23080	///
23081	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23082	#[inline]
23083	#[target_feature(enable = "avx512f,avx512vl")]
23084	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23085	#[cfg_attr(test, assert_instr(vpermpd))]
23086	pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23087	unsafe {
23088	let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
23089	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23090	}
23091	}
23092
23093	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23094	///
23095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23096	#[inline]
23097	#[target_feature(enable = "avx512f,avx512vl")]
23098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099	#[cfg_attr(test, assert_instr(vpermpd))]
23100	pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23101	unsafe {
23102	let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
23103	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23104	}
23105	}
23106
23107	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23108	///
23109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23110	#[inline]
23111	#[target_feature(enable = "avx512f")]
23112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23113	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23114	pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23115	unsafe { transmute(src:vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23116	}
23117
23118	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23119	///
23120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23121	#[inline]
23122	#[target_feature(enable = "avx512f")]
23123	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23124	#[cfg_attr(test, assert_instr(vpermt2d))]
23125	pub fn _mm512_mask_permutex2var_epi32(
23126	a: __m512i,
23127	k: __mmask16,
23128	idx: __m512i,
23129	b: __m512i,
23130	) -> __m512i {
23131	unsafe {
23132	let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23133	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
23134	}
23135	}
23136
23137	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23138	///
23139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23140	#[inline]
23141	#[target_feature(enable = "avx512f")]
23142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23143	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23144	pub fn _mm512_maskz_permutex2var_epi32(
23145	k: __mmask16,
23146	a: __m512i,
23147	idx: __m512i,
23148	b: __m512i,
23149	) -> __m512i {
23150	unsafe {
23151	let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23152	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23153	}
23154	}
23155
23156	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23157	///
23158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23159	#[inline]
23160	#[target_feature(enable = "avx512f")]
23161	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23162	#[cfg_attr(test, assert_instr(vpermi2d))]
23163	pub fn _mm512_mask2_permutex2var_epi32(
23164	a: __m512i,
23165	idx: __m512i,
23166	k: __mmask16,
23167	b: __m512i,
23168	) -> __m512i {
23169	unsafe {
23170	let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23171	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
23172	}
23173	}
23174
23175	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23176	///
23177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23178	#[inline]
23179	#[target_feature(enable = "avx512f,avx512vl")]
23180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23181	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23182	pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23183	unsafe { transmute(src:vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23184	}
23185
23186	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23187	///
23188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23189	#[inline]
23190	#[target_feature(enable = "avx512f,avx512vl")]
23191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23192	#[cfg_attr(test, assert_instr(vpermt2d))]
23193	pub fn _mm256_mask_permutex2var_epi32(
23194	a: __m256i,
23195	k: __mmask8,
23196	idx: __m256i,
23197	b: __m256i,
23198	) -> __m256i {
23199	unsafe {
23200	let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23201	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
23202	}
23203	}
23204
23205	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23206	///
23207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23208	#[inline]
23209	#[target_feature(enable = "avx512f,avx512vl")]
23210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23211	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23212	pub fn _mm256_maskz_permutex2var_epi32(
23213	k: __mmask8,
23214	a: __m256i,
23215	idx: __m256i,
23216	b: __m256i,
23217	) -> __m256i {
23218	unsafe {
23219	let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23220	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
23221	}
23222	}
23223
23224	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23225	///
23226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23227	#[inline]
23228	#[target_feature(enable = "avx512f,avx512vl")]
23229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23230	#[cfg_attr(test, assert_instr(vpermi2d))]
23231	pub fn _mm256_mask2_permutex2var_epi32(
23232	a: __m256i,
23233	idx: __m256i,
23234	k: __mmask8,
23235	b: __m256i,
23236	) -> __m256i {
23237	unsafe {
23238	let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23239	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
23240	}
23241	}
23242
23243	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23244	///
23245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23246	#[inline]
23247	#[target_feature(enable = "avx512f,avx512vl")]
23248	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23249	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23250	pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23251	unsafe { transmute(src:vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23252	}
23253
23254	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23255	///
23256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23257	#[inline]
23258	#[target_feature(enable = "avx512f,avx512vl")]
23259	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23260	#[cfg_attr(test, assert_instr(vpermt2d))]
23261	pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23262	unsafe {
23263	let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23264	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
23265	}
23266	}
23267
23268	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23269	///
23270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23271	#[inline]
23272	#[target_feature(enable = "avx512f,avx512vl")]
23273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23274	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23275	pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23276	unsafe {
23277	let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23278	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x4::ZERO))
23279	}
23280	}
23281
23282	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23283	///
23284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23285	#[inline]
23286	#[target_feature(enable = "avx512f,avx512vl")]
23287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23288	#[cfg_attr(test, assert_instr(vpermi2d))]
23289	pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23290	unsafe {
23291	let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23292	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
23293	}
23294	}
23295
23296	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23297	///
23298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23299	#[inline]
23300	#[target_feature(enable = "avx512f")]
23301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23302	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23303	pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23304	unsafe { transmute(src:vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23305	}
23306
23307	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23308	///
23309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23310	#[inline]
23311	#[target_feature(enable = "avx512f")]
23312	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23313	#[cfg_attr(test, assert_instr(vpermt2q))]
23314	pub fn _mm512_mask_permutex2var_epi64(
23315	a: __m512i,
23316	k: __mmask8,
23317	idx: __m512i,
23318	b: __m512i,
23319	) -> __m512i {
23320	unsafe {
23321	let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23322	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
23323	}
23324	}
23325
23326	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23327	///
23328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23329	#[inline]
23330	#[target_feature(enable = "avx512f")]
23331	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23332	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23333	pub fn _mm512_maskz_permutex2var_epi64(
23334	k: __mmask8,
23335	a: __m512i,
23336	idx: __m512i,
23337	b: __m512i,
23338	) -> __m512i {
23339	unsafe {
23340	let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23341	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
23342	}
23343	}
23344
23345	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23346	///
23347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23348	#[inline]
23349	#[target_feature(enable = "avx512f")]
23350	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23351	#[cfg_attr(test, assert_instr(vpermi2q))]
23352	pub fn _mm512_mask2_permutex2var_epi64(
23353	a: __m512i,
23354	idx: __m512i,
23355	k: __mmask8,
23356	b: __m512i,
23357	) -> __m512i {
23358	unsafe {
23359	let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23360	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
23361	}
23362	}
23363
23364	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23365	///
23366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23367	#[inline]
23368	#[target_feature(enable = "avx512f,avx512vl")]
23369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23370	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23371	pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23372	unsafe { transmute(src:vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23373	}
23374
23375	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23376	///
23377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23378	#[inline]
23379	#[target_feature(enable = "avx512f,avx512vl")]
23380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23381	#[cfg_attr(test, assert_instr(vpermt2q))]
23382	pub fn _mm256_mask_permutex2var_epi64(
23383	a: __m256i,
23384	k: __mmask8,
23385	idx: __m256i,
23386	b: __m256i,
23387	) -> __m256i {
23388	unsafe {
23389	let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23390	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
23391	}
23392	}
23393
23394	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23395	///
23396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23397	#[inline]
23398	#[target_feature(enable = "avx512f,avx512vl")]
23399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23400	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23401	pub fn _mm256_maskz_permutex2var_epi64(
23402	k: __mmask8,
23403	a: __m256i,
23404	idx: __m256i,
23405	b: __m256i,
23406	) -> __m256i {
23407	unsafe {
23408	let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23409	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
23410	}
23411	}
23412
23413	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23414	///
23415	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23416	#[inline]
23417	#[target_feature(enable = "avx512f,avx512vl")]
23418	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23419	#[cfg_attr(test, assert_instr(vpermi2q))]
23420	pub fn _mm256_mask2_permutex2var_epi64(
23421	a: __m256i,
23422	idx: __m256i,
23423	k: __mmask8,
23424	b: __m256i,
23425	) -> __m256i {
23426	unsafe {
23427	let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23428	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
23429	}
23430	}
23431
23432	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23433	///
23434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23435	#[inline]
23436	#[target_feature(enable = "avx512f,avx512vl")]
23437	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23438	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23439	pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23440	unsafe { transmute(src:vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23441	}
23442
23443	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23444	///
23445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23446	#[inline]
23447	#[target_feature(enable = "avx512f,avx512vl")]
23448	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23449	#[cfg_attr(test, assert_instr(vpermt2q))]
23450	pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23451	unsafe {
23452	let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23453	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
23454	}
23455	}
23456
23457	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23458	///
23459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23460	#[inline]
23461	#[target_feature(enable = "avx512f,avx512vl")]
23462	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23464	pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23465	unsafe {
23466	let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23467	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x2::ZERO))
23468	}
23469	}
23470
23471	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23472	///
23473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23474	#[inline]
23475	#[target_feature(enable = "avx512f,avx512vl")]
23476	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23477	#[cfg_attr(test, assert_instr(vpermi2q))]
23478	pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23479	unsafe {
23480	let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23481	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
23482	}
23483	}
23484
23485	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23486	///
23487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23488	#[inline]
23489	#[target_feature(enable = "avx512f")]
23490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23491	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23492	pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23493	unsafe { transmute(src:vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23494	}
23495
23496	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23497	///
23498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23499	#[inline]
23500	#[target_feature(enable = "avx512f")]
23501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23502	#[cfg_attr(test, assert_instr(vpermt2ps))]
23503	pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23504	unsafe {
23505	let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23506	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
23507	}
23508	}
23509
23510	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23511	///
23512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23513	#[inline]
23514	#[target_feature(enable = "avx512f")]
23515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23516	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23517	pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23518	unsafe {
23519	let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23520	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23521	}
23522	}
23523
23524	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23525	///
23526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23527	#[inline]
23528	#[target_feature(enable = "avx512f")]
23529	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23530	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23531	pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23532	unsafe {
23533	let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23534	let idx: f32x16 = _mm512_castsi512_ps(idx).as_f32x16();
23535	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23536	}
23537	}
23538
23539	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23540	///
23541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23542	#[inline]
23543	#[target_feature(enable = "avx512f,avx512vl")]
23544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23545	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23546	pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23547	unsafe { transmute(src:vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23548	}
23549
23550	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23551	///
23552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23553	#[inline]
23554	#[target_feature(enable = "avx512f,avx512vl")]
23555	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23556	#[cfg_attr(test, assert_instr(vpermt2ps))]
23557	pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23558	unsafe {
23559	let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23560	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
23561	}
23562	}
23563
23564	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23565	///
23566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23567	#[inline]
23568	#[target_feature(enable = "avx512f,avx512vl")]
23569	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23570	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23571	pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23572	unsafe {
23573	let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23574	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23575	}
23576	}
23577
23578	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23579	///
23580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23581	#[inline]
23582	#[target_feature(enable = "avx512f,avx512vl")]
23583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23584	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23585	pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23586	unsafe {
23587	let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23588	let idx: f32x8 = _mm256_castsi256_ps(idx).as_f32x8();
23589	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23590	}
23591	}
23592
23593	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23594	///
23595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23596	#[inline]
23597	#[target_feature(enable = "avx512f,avx512vl")]
23598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23599	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23600	pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23601	unsafe { transmute(src:vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23602	}
23603
23604	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23605	///
23606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23607	#[inline]
23608	#[target_feature(enable = "avx512f,avx512vl")]
23609	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23610	#[cfg_attr(test, assert_instr(vpermt2ps))]
23611	pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23612	unsafe {
23613	let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23614	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
23615	}
23616	}
23617
23618	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23619	///
23620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23621	#[inline]
23622	#[target_feature(enable = "avx512f,avx512vl")]
23623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23624	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23625	pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23626	unsafe {
23627	let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23628	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
23629	}
23630	}
23631
23632	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23633	///
23634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23635	#[inline]
23636	#[target_feature(enable = "avx512f,avx512vl")]
23637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23639	pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23640	unsafe {
23641	let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23642	let idx: f32x4 = _mm_castsi128_ps(idx).as_f32x4();
23643	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23644	}
23645	}
23646
23647	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23648	///
23649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23650	#[inline]
23651	#[target_feature(enable = "avx512f")]
23652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23653	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23654	pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23655	unsafe { transmute(src:vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23656	}
23657
23658	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23659	///
23660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23661	#[inline]
23662	#[target_feature(enable = "avx512f")]
23663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23664	#[cfg_attr(test, assert_instr(vpermt2pd))]
23665	pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23666	unsafe {
23667	let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23668	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
23669	}
23670	}
23671
23672	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23673	///
23674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23675	#[inline]
23676	#[target_feature(enable = "avx512f")]
23677	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23678	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23679	pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23680	unsafe {
23681	let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23682	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23683	}
23684	}
23685
23686	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23687	///
23688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23689	#[inline]
23690	#[target_feature(enable = "avx512f")]
23691	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23692	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23693	pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23694	unsafe {
23695	let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23696	let idx: f64x8 = _mm512_castsi512_pd(idx).as_f64x8();
23697	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23698	}
23699	}
23700
23701	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23702	///
23703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23704	#[inline]
23705	#[target_feature(enable = "avx512f,avx512vl")]
23706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23707	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23708	pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23709	unsafe { transmute(src:vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23710	}
23711
23712	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23713	///
23714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23715	#[inline]
23716	#[target_feature(enable = "avx512f,avx512vl")]
23717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23718	#[cfg_attr(test, assert_instr(vpermt2pd))]
23719	pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23720	unsafe {
23721	let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23722	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
23723	}
23724	}
23725
23726	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23727	///
23728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23729	#[inline]
23730	#[target_feature(enable = "avx512f,avx512vl")]
23731	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23732	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23733	pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23734	unsafe {
23735	let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23736	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23737	}
23738	}
23739
23740	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23741	///
23742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23743	#[inline]
23744	#[target_feature(enable = "avx512f,avx512vl")]
23745	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23746	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23747	pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23748	unsafe {
23749	let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23750	let idx: f64x4 = _mm256_castsi256_pd(idx).as_f64x4();
23751	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23752	}
23753	}
23754
23755	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23756	///
23757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23758	#[inline]
23759	#[target_feature(enable = "avx512f,avx512vl")]
23760	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23761	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23762	pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23763	unsafe { transmute(src:vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23764	}
23765
23766	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23767	///
23768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23769	#[inline]
23770	#[target_feature(enable = "avx512f,avx512vl")]
23771	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23772	#[cfg_attr(test, assert_instr(vpermt2pd))]
23773	pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23774	unsafe {
23775	let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23776	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
23777	}
23778	}
23779
23780	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23781	///
23782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23783	#[inline]
23784	#[target_feature(enable = "avx512f,avx512vl")]
23785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23786	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23787	pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23788	unsafe {
23789	let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23790	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
23791	}
23792	}
23793
23794	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23795	///
23796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23797	#[inline]
23798	#[target_feature(enable = "avx512f,avx512vl")]
23799	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23800	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23801	pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23802	unsafe {
23803	let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23804	let idx: f64x2 = _mm_castsi128_pd(idx).as_f64x2();
23805	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23806	}
23807	}
23808
23809	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23810	///
23811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23812	#[inline]
23813	#[target_feature(enable = "avx512f")]
23814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23815	#[cfg_attr(test, assert_instr(vshufps, MASK = `9`))] //should be vpshufd
23816	#[rustc_legacy_const_generics(`1`)]
23817	pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23818	unsafe {
23819	static_assert_uimm_bits!(MASK, `8`);
23820	let r: i32x16 = simd_shuffle!(
23821	a.as_i32x16(),
23822	a.as_i32x16(),
23823	[
23824	MASK as u32 & `0b11`,
23825	(MASK as u32 >> `2`) & `0b11`,
23826	(MASK as u32 >> `4`) & `0b11`,
23827	(MASK as u32 >> `6`) & `0b11`,
23828	(MASK as u32 & `0b11`) + `4`,
23829	((MASK as u32 >> `2`) & `0b11`) + `4`,
23830	((MASK as u32 >> `4`) & `0b11`) + `4`,
23831	((MASK as u32 >> `6`) & `0b11`) + `4`,
23832	(MASK as u32 & `0b11`) + `8`,
23833	((MASK as u32 >> `2`) & `0b11`) + `8`,
23834	((MASK as u32 >> `4`) & `0b11`) + `8`,
23835	((MASK as u32 >> `6`) & `0b11`) + `8`,
23836	(MASK as u32 & `0b11`) + `12`,
23837	((MASK as u32 >> `2`) & `0b11`) + `12`,
23838	((MASK as u32 >> `4`) & `0b11`) + `12`,
23839	((MASK as u32 >> `6`) & `0b11`) + `12`,
23840	],
23841	);
23842	transmute(r)
23843	}
23844	}
23845
23846	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23847	///
23848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23849	#[inline]
23850	#[target_feature(enable = "avx512f")]
23851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23852	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
23853	#[rustc_legacy_const_generics(`3`)]
23854	pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23855	src: __m512i,
23856	k: __mmask16,
23857	a: __m512i,
23858	) -> __m512i {
23859	unsafe {
23860	static_assert_uimm_bits!(MASK, `8`);
23861	let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
23862	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
23863	}
23864	}
23865
23866	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23867	///
23868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23869	#[inline]
23870	#[target_feature(enable = "avx512f")]
23871	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23872	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
23873	#[rustc_legacy_const_generics(`2`)]
23874	pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23875	unsafe {
23876	static_assert_uimm_bits!(MASK, `8`);
23877	let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
23878	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
23879	}
23880	}
23881
23882	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23883	///
23884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23885	#[inline]
23886	#[target_feature(enable = "avx512f,avx512vl")]
23887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23888	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
23889	#[rustc_legacy_const_generics(`3`)]
23890	pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23891	src: __m256i,
23892	k: __mmask8,
23893	a: __m256i,
23894	) -> __m256i {
23895	unsafe {
23896	static_assert_uimm_bits!(MASK, `8`);
23897	let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
23898	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
23899	}
23900	}
23901
23902	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23903	///
23904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23905	#[inline]
23906	#[target_feature(enable = "avx512f,avx512vl")]
23907	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23908	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
23909	#[rustc_legacy_const_generics(`2`)]
23910	pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23911	unsafe {
23912	static_assert_uimm_bits!(MASK, `8`);
23913	let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
23914	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
23915	}
23916	}
23917
23918	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23919	///
23920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23921	#[inline]
23922	#[target_feature(enable = "avx512f,avx512vl")]
23923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23924	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
23925	#[rustc_legacy_const_generics(`3`)]
23926	pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23927	src: __m128i,
23928	k: __mmask8,
23929	a: __m128i,
23930	) -> __m128i {
23931	unsafe {
23932	static_assert_uimm_bits!(MASK, `8`);
23933	let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
23934	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
23935	}
23936	}
23937
23938	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23939	///
23940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
23941	#[inline]
23942	#[target_feature(enable = "avx512f,avx512vl")]
23943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23944	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
23945	#[rustc_legacy_const_generics(`2`)]
23946	pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
23947	unsafe {
23948	static_assert_uimm_bits!(MASK, `8`);
23949	let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
23950	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
23951	}
23952	}
23953
23954	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23955	///
23956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
23957	#[inline]
23958	#[target_feature(enable = "avx512f")]
23959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23960	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
23961	#[rustc_legacy_const_generics(`2`)]
23962	pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23963	unsafe {
23964	static_assert_uimm_bits!(MASK, `8`);
23965	simd_shuffle!(
23966	a,
23967	b,
23968	[
23969	MASK as u32 & `0b11`,
23970	(MASK as u32 >> `2`) & `0b11`,
23971	((MASK as u32 >> `4`) & `0b11`) + `16`,
23972	((MASK as u32 >> `6`) & `0b11`) + `16`,
23973	(MASK as u32 & `0b11`) + `4`,
23974	((MASK as u32 >> `2`) & `0b11`) + `4`,
23975	((MASK as u32 >> `4`) & `0b11`) + `20`,
23976	((MASK as u32 >> `6`) & `0b11`) + `20`,
23977	(MASK as u32 & `0b11`) + `8`,
23978	((MASK as u32 >> `2`) & `0b11`) + `8`,
23979	((MASK as u32 >> `4`) & `0b11`) + `24`,
23980	((MASK as u32 >> `6`) & `0b11`) + `24`,
23981	(MASK as u32 & `0b11`) + `12`,
23982	((MASK as u32 >> `2`) & `0b11`) + `12`,
23983	((MASK as u32 >> `4`) & `0b11`) + `28`,
23984	((MASK as u32 >> `6`) & `0b11`) + `28`,
23985	],
23986	)
23987	}
23988	}
23989
23990	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23991	///
23992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
23993	#[inline]
23994	#[target_feature(enable = "avx512f")]
23995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23996	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
23997	#[rustc_legacy_const_generics(`4`)]
23998	pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
23999	src: __m512,
24000	k: __mmask16,
24001	a: __m512,
24002	b: __m512,
24003	) -> __m512 {
24004	unsafe {
24005	static_assert_uimm_bits!(MASK, `8`);
24006	let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24007	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24008	}
24009	}
24010
24011	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24012	///
24013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24014	#[inline]
24015	#[target_feature(enable = "avx512f")]
24016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24017	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24018	#[rustc_legacy_const_generics(`3`)]
24019	pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24020	unsafe {
24021	static_assert_uimm_bits!(MASK, `8`);
24022	let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24023	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24024	}
24025	}
24026
24027	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24028	///
24029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24030	#[inline]
24031	#[target_feature(enable = "avx512f,avx512vl")]
24032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24033	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24034	#[rustc_legacy_const_generics(`4`)]
24035	pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24036	src: __m256,
24037	k: __mmask8,
24038	a: __m256,
24039	b: __m256,
24040	) -> __m256 {
24041	unsafe {
24042	static_assert_uimm_bits!(MASK, `8`);
24043	let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24044	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24045	}
24046	}
24047
24048	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24049	///
24050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24051	#[inline]
24052	#[target_feature(enable = "avx512f,avx512vl")]
24053	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24054	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24055	#[rustc_legacy_const_generics(`3`)]
24056	pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24057	unsafe {
24058	static_assert_uimm_bits!(MASK, `8`);
24059	let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24060	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24061	}
24062	}
24063
24064	/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065	///
24066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24067	#[inline]
24068	#[target_feature(enable = "avx512f,avx512vl")]
24069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24071	#[rustc_legacy_const_generics(`4`)]
24072	pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24073	src: __m128,
24074	k: __mmask8,
24075	a: __m128,
24076	b: __m128,
24077	) -> __m128 {
24078	unsafe {
24079	static_assert_uimm_bits!(MASK, `8`);
24080	let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24081	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24082	}
24083	}
24084
24085	/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086	///
24087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24088	#[inline]
24089	#[target_feature(enable = "avx512f,avx512vl")]
24090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24091	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24092	#[rustc_legacy_const_generics(`3`)]
24093	pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24094	unsafe {
24095	static_assert_uimm_bits!(MASK, `8`);
24096	let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24097	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24098	}
24099	}
24100
24101	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24102	///
24103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24104	#[inline]
24105	#[target_feature(enable = "avx512f")]
24106	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24108	#[rustc_legacy_const_generics(`2`)]
24109	pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24110	unsafe {
24111	static_assert_uimm_bits!(MASK, `8`);
24112	simd_shuffle!(
24113	a,
24114	b,
24115	[
24116	MASK as u32 & `0b1`,
24117	((MASK as u32 >> `1`) & `0b1`) + `8`,
24118	((MASK as u32 >> `2`) & `0b1`) + `2`,
24119	((MASK as u32 >> `3`) & `0b1`) + `10`,
24120	((MASK as u32 >> `4`) & `0b1`) + `4`,
24121	((MASK as u32 >> `5`) & `0b1`) + `12`,
24122	((MASK as u32 >> `6`) & `0b1`) + `6`,
24123	((MASK as u32 >> `7`) & `0b1`) + `14`,
24124	],
24125	)
24126	}
24127	}
24128
24129	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24130	///
24131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24132	#[inline]
24133	#[target_feature(enable = "avx512f")]
24134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24135	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24136	#[rustc_legacy_const_generics(`4`)]
24137	pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24138	src: __m512d,
24139	k: __mmask8,
24140	a: __m512d,
24141	b: __m512d,
24142	) -> __m512d {
24143	unsafe {
24144	static_assert_uimm_bits!(MASK, `8`);
24145	let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
24146	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24147	}
24148	}
24149
24150	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24151	///
24152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24153	#[inline]
24154	#[target_feature(enable = "avx512f")]
24155	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24156	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24157	#[rustc_legacy_const_generics(`3`)]
24158	pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24159	unsafe {
24160	static_assert_uimm_bits!(MASK, `8`);
24161	let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
24162	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
24163	}
24164	}
24165
24166	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24167	///
24168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24169	#[inline]
24170	#[target_feature(enable = "avx512f,avx512vl")]
24171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24172	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24173	#[rustc_legacy_const_generics(`4`)]
24174	pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24175	src: __m256d,
24176	k: __mmask8,
24177	a: __m256d,
24178	b: __m256d,
24179	) -> __m256d {
24180	unsafe {
24181	static_assert_uimm_bits!(MASK, `8`);
24182	let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
24183	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
24184	}
24185	}
24186
24187	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24188	///
24189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24190	#[inline]
24191	#[target_feature(enable = "avx512f,avx512vl")]
24192	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24193	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24194	#[rustc_legacy_const_generics(`3`)]
24195	pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24196	unsafe {
24197	static_assert_uimm_bits!(MASK, `8`);
24198	let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
24199	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
24200	}
24201	}
24202
24203	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204	///
24205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24206	#[inline]
24207	#[target_feature(enable = "avx512f,avx512vl")]
24208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24209	#[cfg_attr(test, assert_instr(vshufpd, MASK = `1`))]
24210	#[rustc_legacy_const_generics(`4`)]
24211	pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24212	src: __m128d,
24213	k: __mmask8,
24214	a: __m128d,
24215	b: __m128d,
24216	) -> __m128d {
24217	unsafe {
24218	static_assert_uimm_bits!(MASK, `8`);
24219	let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
24220	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
24221	}
24222	}
24223
24224	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225	///
24226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24227	#[inline]
24228	#[target_feature(enable = "avx512f,avx512vl")]
24229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24230	#[cfg_attr(test, assert_instr(vshufpd, MASK = `1`))]
24231	#[rustc_legacy_const_generics(`3`)]
24232	pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24233	unsafe {
24234	static_assert_uimm_bits!(MASK, `8`);
24235	let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
24236	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
24237	}
24238	}
24239
24240	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24241	///
24242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24243	#[inline]
24244	#[target_feature(enable = "avx512f")]
24245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24246	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_01_01_01`))] //should be vshufi32x4
24247	#[rustc_legacy_const_generics(`2`)]
24248	pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24249	unsafe {
24250	static_assert_uimm_bits!(MASK, `8`);
24251	let a = a.as_i32x16();
24252	let b = b.as_i32x16();
24253	let r: i32x16 = simd_shuffle!(
24254	a,
24255	b,
24256	[
24257	(MASK as u32 & `0b11`) * `4` + `0`,
24258	(MASK as u32 & `0b11`) * `4` + `1`,
24259	(MASK as u32 & `0b11`) * `4` + `2`,
24260	(MASK as u32 & `0b11`) * `4` + `3`,
24261	((MASK as u32 >> `2`) & `0b11`) * `4` + `0`,
24262	((MASK as u32 >> `2`) & `0b11`) * `4` + `1`,
24263	((MASK as u32 >> `2`) & `0b11`) * `4` + `2`,
24264	((MASK as u32 >> `2`) & `0b11`) * `4` + `3`,
24265	((MASK as u32 >> `4`) & `0b11`) * `4` + `0` + `16`,
24266	((MASK as u32 >> `4`) & `0b11`) * `4` + `1` + `16`,
24267	((MASK as u32 >> `4`) & `0b11`) * `4` + `2` + `16`,
24268	((MASK as u32 >> `4`) & `0b11`) * `4` + `3` + `16`,
24269	((MASK as u32 >> `6`) & `0b11`) * `4` + `0` + `16`,
24270	((MASK as u32 >> `6`) & `0b11`) * `4` + `1` + `16`,
24271	((MASK as u32 >> `6`) & `0b11`) * `4` + `2` + `16`,
24272	((MASK as u32 >> `6`) & `0b11`) * `4` + `3` + `16`,
24273	],
24274	);
24275	transmute(r)
24276	}
24277	}
24278
24279	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24280	///
24281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24282	#[inline]
24283	#[target_feature(enable = "avx512f")]
24284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24285	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b10_11_01_01`))]
24286	#[rustc_legacy_const_generics(`4`)]
24287	pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24288	src: __m512i,
24289	k: __mmask16,
24290	a: __m512i,
24291	b: __m512i,
24292	) -> __m512i {
24293	unsafe {
24294	static_assert_uimm_bits!(MASK, `8`);
24295	let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
24296	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24297	}
24298	}
24299
24300	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24301	///
24302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24303	#[inline]
24304	#[target_feature(enable = "avx512f")]
24305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24306	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b10_11_01_01`))]
24307	#[rustc_legacy_const_generics(`3`)]
24308	pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24309	k: __mmask16,
24310	a: __m512i,
24311	b: __m512i,
24312	) -> __m512i {
24313	unsafe {
24314	static_assert_uimm_bits!(MASK, `8`);
24315	let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
24316	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
24317	}
24318	}
24319
24320	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24321	///
24322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24323	#[inline]
24324	#[target_feature(enable = "avx512f,avx512vl")]
24325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24326	#[cfg_attr(test, assert_instr(vperm, MASK = `0b11`))] //should be vshufi32x4
24327	#[rustc_legacy_const_generics(`2`)]
24328	pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24329	unsafe {
24330	static_assert_uimm_bits!(MASK, `8`);
24331	let a: i32x8 = a.as_i32x8();
24332	let b: i32x8 = b.as_i32x8();
24333	let r: i32x8 = simd_shuffle!(
24334	a,
24335	b,
24336	[
24337	(MASK as u32 & `0b1`) * `4` + `0`,
24338	(MASK as u32 & `0b1`) * `4` + `1`,
24339	(MASK as u32 & `0b1`) * `4` + `2`,
24340	(MASK as u32 & `0b1`) * `4` + `3`,
24341	((MASK as u32 >> `1`) & `0b1`) * `4` + `0` + `8`,
24342	((MASK as u32 >> `1`) & `0b1`) * `4` + `1` + `8`,
24343	((MASK as u32 >> `1`) & `0b1`) * `4` + `2` + `8`,
24344	((MASK as u32 >> `1`) & `0b1`) * `4` + `3` + `8`,
24345	],
24346	);
24347	transmute(src:r)
24348	}
24349	}
24350
24351	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24352	///
24353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24354	#[inline]
24355	#[target_feature(enable = "avx512f,avx512vl")]
24356	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24357	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b11`))]
24358	#[rustc_legacy_const_generics(`4`)]
24359	pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24360	src: __m256i,
24361	k: __mmask8,
24362	a: __m256i,
24363	b: __m256i,
24364	) -> __m256i {
24365	unsafe {
24366	static_assert_uimm_bits!(MASK, `8`);
24367	let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
24368	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24369	}
24370	}
24371
24372	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24373	///
24374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24375	#[inline]
24376	#[target_feature(enable = "avx512f,avx512vl")]
24377	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24378	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b11`))]
24379	#[rustc_legacy_const_generics(`3`)]
24380	pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24381	unsafe {
24382	static_assert_uimm_bits!(MASK, `8`);
24383	let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
24384	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
24385	}
24386	}
24387
24388	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24389	///
24390	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24391	#[inline]
24392	#[target_feature(enable = "avx512f")]
24393	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24394	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
24395	#[rustc_legacy_const_generics(`2`)]
24396	pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24397	unsafe {
24398	static_assert_uimm_bits!(MASK, `8`);
24399	let a: i64x8 = a.as_i64x8();
24400	let b: i64x8 = b.as_i64x8();
24401	let r: i64x8 = simd_shuffle!(
24402	a,
24403	b,
24404	[
24405	(MASK as u32 & `0b11`) * `2` + `0`,
24406	(MASK as u32 & `0b11`) * `2` + `1`,
24407	((MASK as u32 >> `2`) & `0b11`) * `2` + `0`,
24408	((MASK as u32 >> `2`) & `0b11`) * `2` + `1`,
24409	((MASK as u32 >> `4`) & `0b11`) * `2` + `0` + `8`,
24410	((MASK as u32 >> `4`) & `0b11`) * `2` + `1` + `8`,
24411	((MASK as u32 >> `6`) & `0b11`) * `2` + `0` + `8`,
24412	((MASK as u32 >> `6`) & `0b11`) * `2` + `1` + `8`,
24413	],
24414	);
24415	transmute(src:r)
24416	}
24417	}
24418
24419	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24420	///
24421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24422	#[inline]
24423	#[target_feature(enable = "avx512f")]
24424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24425	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
24426	#[rustc_legacy_const_generics(`4`)]
24427	pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24428	src: __m512i,
24429	k: __mmask8,
24430	a: __m512i,
24431	b: __m512i,
24432	) -> __m512i {
24433	unsafe {
24434	static_assert_uimm_bits!(MASK, `8`);
24435	let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
24436	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
24437	}
24438	}
24439
24440	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24441	///
24442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24443	#[inline]
24444	#[target_feature(enable = "avx512f")]
24445	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24446	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
24447	#[rustc_legacy_const_generics(`3`)]
24448	pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24449	unsafe {
24450	static_assert_uimm_bits!(MASK, `8`);
24451	let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
24452	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
24453	}
24454	}
24455
24456	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24457	///
24458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24459	#[inline]
24460	#[target_feature(enable = "avx512f,avx512vl")]
24461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24462	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshufi64x2
24463	#[rustc_legacy_const_generics(`2`)]
24464	pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24465	unsafe {
24466	static_assert_uimm_bits!(MASK, `8`);
24467	let a: i64x4 = a.as_i64x4();
24468	let b: i64x4 = b.as_i64x4();
24469	let r: i64x4 = simd_shuffle!(
24470	a,
24471	b,
24472	[
24473	(MASK as u32 & `0b1`) * `2` + `0`,
24474	(MASK as u32 & `0b1`) * `2` + `1`,
24475	((MASK as u32 >> `1`) & `0b1`) * `2` + `0` + `4`,
24476	((MASK as u32 >> `1`) & `0b1`) * `2` + `1` + `4`,
24477	],
24478	);
24479	transmute(src:r)
24480	}
24481	}
24482
24483	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24484	///
24485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24486	#[inline]
24487	#[target_feature(enable = "avx512f,avx512vl")]
24488	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24489	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b11`))]
24490	#[rustc_legacy_const_generics(`4`)]
24491	pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24492	src: __m256i,
24493	k: __mmask8,
24494	a: __m256i,
24495	b: __m256i,
24496	) -> __m256i {
24497	unsafe {
24498	static_assert_uimm_bits!(MASK, `8`);
24499	let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
24500	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
24501	}
24502	}
24503
24504	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24505	///
24506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24507	#[inline]
24508	#[target_feature(enable = "avx512f,avx512vl")]
24509	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24510	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b11`))]
24511	#[rustc_legacy_const_generics(`3`)]
24512	pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24513	unsafe {
24514	static_assert_uimm_bits!(MASK, `8`);
24515	let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
24516	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
24517	}
24518	}
24519
24520	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24521	///
24522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24523	#[inline]
24524	#[target_feature(enable = "avx512f")]
24525	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24526	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b1011`))] //should be vshuff32x4, but generate vshuff64x2
24527	#[rustc_legacy_const_generics(`2`)]
24528	pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24529	unsafe {
24530	static_assert_uimm_bits!(MASK, `8`);
24531	let a = a.as_f32x16();
24532	let b = b.as_f32x16();
24533	let r: f32x16 = simd_shuffle!(
24534	a,
24535	b,
24536	[
24537	(MASK as u32 & `0b11`) * `4` + `0`,
24538	(MASK as u32 & `0b11`) * `4` + `1`,
24539	(MASK as u32 & `0b11`) * `4` + `2`,
24540	(MASK as u32 & `0b11`) * `4` + `3`,
24541	((MASK as u32 >> `2`) & `0b11`) * `4` + `0`,
24542	((MASK as u32 >> `2`) & `0b11`) * `4` + `1`,
24543	((MASK as u32 >> `2`) & `0b11`) * `4` + `2`,
24544	((MASK as u32 >> `2`) & `0b11`) * `4` + `3`,
24545	((MASK as u32 >> `4`) & `0b11`) * `4` + `0` + `16`,
24546	((MASK as u32 >> `4`) & `0b11`) * `4` + `1` + `16`,
24547	((MASK as u32 >> `4`) & `0b11`) * `4` + `2` + `16`,
24548	((MASK as u32 >> `4`) & `0b11`) * `4` + `3` + `16`,
24549	((MASK as u32 >> `6`) & `0b11`) * `4` + `0` + `16`,
24550	((MASK as u32 >> `6`) & `0b11`) * `4` + `1` + `16`,
24551	((MASK as u32 >> `6`) & `0b11`) * `4` + `2` + `16`,
24552	((MASK as u32 >> `6`) & `0b11`) * `4` + `3` + `16`,
24553	],
24554	);
24555	transmute(r)
24556	}
24557	}
24558
24559	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24560	///
24561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24562	#[inline]
24563	#[target_feature(enable = "avx512f")]
24564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24565	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b1011`))]
24566	#[rustc_legacy_const_generics(`4`)]
24567	pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24568	src: __m512,
24569	k: __mmask16,
24570	a: __m512,
24571	b: __m512,
24572	) -> __m512 {
24573	unsafe {
24574	static_assert_uimm_bits!(MASK, `8`);
24575	let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
24576	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24577	}
24578	}
24579
24580	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24581	///
24582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24583	#[inline]
24584	#[target_feature(enable = "avx512f")]
24585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24586	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b1011`))]
24587	#[rustc_legacy_const_generics(`3`)]
24588	pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24589	unsafe {
24590	static_assert_uimm_bits!(MASK, `8`);
24591	let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
24592	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24593	}
24594	}
24595
24596	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24597	///
24598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24599	#[inline]
24600	#[target_feature(enable = "avx512f,avx512vl")]
24601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24602	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshuff32x4
24603	#[rustc_legacy_const_generics(`2`)]
24604	pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24605	unsafe {
24606	static_assert_uimm_bits!(MASK, `8`);
24607	let a: f32x8 = a.as_f32x8();
24608	let b: f32x8 = b.as_f32x8();
24609	let r: f32x8 = simd_shuffle!(
24610	a,
24611	b,
24612	[
24613	(MASK as u32 & `0b1`) * `4` + `0`,
24614	(MASK as u32 & `0b1`) * `4` + `1`,
24615	(MASK as u32 & `0b1`) * `4` + `2`,
24616	(MASK as u32 & `0b1`) * `4` + `3`,
24617	((MASK as u32 >> `1`) & `0b1`) * `4` + `0` + `8`,
24618	((MASK as u32 >> `1`) & `0b1`) * `4` + `1` + `8`,
24619	((MASK as u32 >> `1`) & `0b1`) * `4` + `2` + `8`,
24620	((MASK as u32 >> `1`) & `0b1`) * `4` + `3` + `8`,
24621	],
24622	);
24623	transmute(src:r)
24624	}
24625	}
24626
24627	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24628	///
24629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24630	#[inline]
24631	#[target_feature(enable = "avx512f,avx512vl")]
24632	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24633	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b11`))]
24634	#[rustc_legacy_const_generics(`4`)]
24635	pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24636	src: __m256,
24637	k: __mmask8,
24638	a: __m256,
24639	b: __m256,
24640	) -> __m256 {
24641	unsafe {
24642	static_assert_uimm_bits!(MASK, `8`);
24643	let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
24644	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24645	}
24646	}
24647
24648	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24649	///
24650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24651	#[inline]
24652	#[target_feature(enable = "avx512f,avx512vl")]
24653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24654	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b11`))]
24655	#[rustc_legacy_const_generics(`3`)]
24656	pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24657	unsafe {
24658	static_assert_uimm_bits!(MASK, `8`);
24659	let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
24660	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24661	}
24662	}
24663
24664	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24665	///
24666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24667	#[inline]
24668	#[target_feature(enable = "avx512f")]
24669	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24670	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
24671	#[rustc_legacy_const_generics(`2`)]
24672	pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24673	unsafe {
24674	static_assert_uimm_bits!(MASK, `8`);
24675	let a: f64x8 = a.as_f64x8();
24676	let b: f64x8 = b.as_f64x8();
24677	let r: f64x8 = simd_shuffle!(
24678	a,
24679	b,
24680	[
24681	(MASK as u32 & `0b11`) * `2` + `0`,
24682	(MASK as u32 & `0b11`) * `2` + `1`,
24683	((MASK as u32 >> `2`) & `0b11`) * `2` + `0`,
24684	((MASK as u32 >> `2`) & `0b11`) * `2` + `1`,
24685	((MASK as u32 >> `4`) & `0b11`) * `2` + `0` + `8`,
24686	((MASK as u32 >> `4`) & `0b11`) * `2` + `1` + `8`,
24687	((MASK as u32 >> `6`) & `0b11`) * `2` + `0` + `8`,
24688	((MASK as u32 >> `6`) & `0b11`) * `2` + `1` + `8`,
24689	],
24690	);
24691	transmute(src:r)
24692	}
24693	}
24694
24695	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24696	///
24697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24698	#[inline]
24699	#[target_feature(enable = "avx512f")]
24700	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24701	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
24702	#[rustc_legacy_const_generics(`4`)]
24703	pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24704	src: __m512d,
24705	k: __mmask8,
24706	a: __m512d,
24707	b: __m512d,
24708	) -> __m512d {
24709	unsafe {
24710	static_assert_uimm_bits!(MASK, `8`);
24711	let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
24712	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24713	}
24714	}
24715
24716	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24717	///
24718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24719	#[inline]
24720	#[target_feature(enable = "avx512f")]
24721	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24722	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
24723	#[rustc_legacy_const_generics(`3`)]
24724	pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24725	unsafe {
24726	static_assert_uimm_bits!(MASK, `8`);
24727	let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
24728	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
24729	}
24730	}
24731
24732	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24733	///
24734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24735	#[inline]
24736	#[target_feature(enable = "avx512f,avx512vl")]
24737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24738	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshuff64x2
24739	#[rustc_legacy_const_generics(`2`)]
24740	pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24741	unsafe {
24742	static_assert_uimm_bits!(MASK, `8`);
24743	let a: f64x4 = a.as_f64x4();
24744	let b: f64x4 = b.as_f64x4();
24745	let r: f64x4 = simd_shuffle!(
24746	a,
24747	b,
24748	[
24749	(MASK as u32 & `0b1`) * `2` + `0`,
24750	(MASK as u32 & `0b1`) * `2` + `1`,
24751	((MASK as u32 >> `1`) & `0b1`) * `2` + `0` + `4`,
24752	((MASK as u32 >> `1`) & `0b1`) * `2` + `1` + `4`,
24753	],
24754	);
24755	transmute(src:r)
24756	}
24757	}
24758
24759	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24760	///
24761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24762	#[inline]
24763	#[target_feature(enable = "avx512f,avx512vl")]
24764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24765	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b11`))]
24766	#[rustc_legacy_const_generics(`4`)]
24767	pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24768	src: __m256d,
24769	k: __mmask8,
24770	a: __m256d,
24771	b: __m256d,
24772	) -> __m256d {
24773	unsafe {
24774	static_assert_uimm_bits!(MASK, `8`);
24775	let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
24776	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
24777	}
24778	}
24779
24780	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24781	///
24782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24783	#[inline]
24784	#[target_feature(enable = "avx512f,avx512vl")]
24785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24786	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b11`))]
24787	#[rustc_legacy_const_generics(`3`)]
24788	pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24789	unsafe {
24790	static_assert_uimm_bits!(MASK, `8`);
24791	let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
24792	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
24793	}
24794	}
24795
24796	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24797	///
24798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24799	#[inline]
24800	#[target_feature(enable = "avx512f")]
24801	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24802	#[cfg_attr(
24803	all(test, not(target_env = "msvc")),
24804	assert_instr(vextractf32x4, IMM8 = `3`)
24805	)]
24806	#[rustc_legacy_const_generics(`1`)]
24807	pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24808	unsafe {
24809	static_assert_uimm_bits!(IMM8, `2`);
24810	match IMM8 & `0x3` {
24811	`0` => simd_shuffle!(a, _mm512_undefined_ps(), [`0`, `1`, `2`, `3`]),
24812	`1` => simd_shuffle!(a, _mm512_undefined_ps(), [`4`, `5`, `6`, `7`]),
24813	`2` => simd_shuffle!(a, _mm512_undefined_ps(), [`8`, `9`, `10`, `11`]),
24814	_ => simd_shuffle!(a, _mm512_undefined_ps(), [`12`, `13`, `14`, `15`]),
24815	}
24816	}
24817	}
24818
24819	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24820	///
24821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24822	#[inline]
24823	#[target_feature(enable = "avx512f")]
24824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24825	#[cfg_attr(
24826	all(test, not(target_env = "msvc")),
24827	assert_instr(vextractf32x4, IMM8 = `3`)
24828	)]
24829	#[rustc_legacy_const_generics(`3`)]
24830	pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24831	unsafe {
24832	static_assert_uimm_bits!(IMM8, `2`);
24833	let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
24834	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24835	}
24836	}
24837
24838	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24839	///
24840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24841	#[inline]
24842	#[target_feature(enable = "avx512f")]
24843	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24844	#[cfg_attr(
24845	all(test, not(target_env = "msvc")),
24846	assert_instr(vextractf32x4, IMM8 = `3`)
24847	)]
24848	#[rustc_legacy_const_generics(`2`)]
24849	pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24850	unsafe {
24851	static_assert_uimm_bits!(IMM8, `2`);
24852	let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
24853	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24854	}
24855	}
24856
24857	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24858	///
24859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24860	#[inline]
24861	#[target_feature(enable = "avx512f,avx512vl")]
24862	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24863	#[cfg_attr(
24864	all(test, not(target_env = "msvc")),
24865	assert_instr(vextract, IMM8 = `1`) //should be vextractf32x4
24866	)]
24867	#[rustc_legacy_const_generics(`1`)]
24868	pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24869	unsafe {
24870	static_assert_uimm_bits!(IMM8, `1`);
24871	match IMM8 & `0x1` {
24872	`0` => simd_shuffle!(a, _mm256_undefined_ps(), [`0`, `1`, `2`, `3`]),
24873	_ => simd_shuffle!(a, _mm256_undefined_ps(), [`4`, `5`, `6`, `7`]),
24874	}
24875	}
24876	}
24877
24878	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24879	///
24880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24881	#[inline]
24882	#[target_feature(enable = "avx512f,avx512vl")]
24883	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24884	#[cfg_attr(
24885	all(test, not(target_env = "msvc")),
24886	assert_instr(vextractf32x4, IMM8 = `1`)
24887	)]
24888	#[rustc_legacy_const_generics(`3`)]
24889	pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24890	unsafe {
24891	static_assert_uimm_bits!(IMM8, `1`);
24892	let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
24893	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24894	}
24895	}
24896
24897	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24898	///
24899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24900	#[inline]
24901	#[target_feature(enable = "avx512f,avx512vl")]
24902	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24903	#[cfg_attr(
24904	all(test, not(target_env = "msvc")),
24905	assert_instr(vextractf32x4, IMM8 = `1`)
24906	)]
24907	#[rustc_legacy_const_generics(`2`)]
24908	pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24909	unsafe {
24910	static_assert_uimm_bits!(IMM8, `1`);
24911	let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
24912	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24913	}
24914	}
24915
24916	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24917	///
24918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24919	#[inline]
24920	#[target_feature(enable = "avx512f")]
24921	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24922	#[cfg_attr(
24923	all(test, not(target_env = "msvc")),
24924	assert_instr(vextractf64x4, IMM1 = `1`) //should be vextracti64x4
24925	)]
24926	#[rustc_legacy_const_generics(`1`)]
24927	pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24928	unsafe {
24929	static_assert_uimm_bits!(IMM1, `1`);
24930	match IMM1 {
24931	`0` => simd_shuffle!(a, _mm512_setzero_si512(), [`0`, `1`, `2`, `3`]),
24932	_ => simd_shuffle!(a, _mm512_setzero_si512(), [`4`, `5`, `6`, `7`]),
24933	}
24934	}
24935	}
24936
24937	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24938	///
24939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24940	#[inline]
24941	#[target_feature(enable = "avx512f")]
24942	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24943	#[cfg_attr(
24944	all(test, not(target_env = "msvc")),
24945	assert_instr(vextracti64x4, IMM1 = `1`)
24946	)]
24947	#[rustc_legacy_const_generics(`3`)]
24948	pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
24949	src: __m256i,
24950	k: __mmask8,
24951	a: __m512i,
24952	) -> __m256i {
24953	unsafe {
24954	static_assert_uimm_bits!(IMM1, `1`);
24955	let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
24956	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
24957	}
24958	}
24959
24960	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24961	///
24962	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
24963	#[inline]
24964	#[target_feature(enable = "avx512f")]
24965	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24966	#[cfg_attr(
24967	all(test, not(target_env = "msvc")),
24968	assert_instr(vextracti64x4, IMM1 = `1`)
24969	)]
24970	#[rustc_legacy_const_generics(`2`)]
24971	pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
24972	unsafe {
24973	static_assert_uimm_bits!(IMM1, `1`);
24974	let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
24975	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
24976	}
24977	}
24978
24979	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24980	///
24981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
24982	#[inline]
24983	#[target_feature(enable = "avx512f")]
24984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24985	#[cfg_attr(
24986	all(test, not(target_env = "msvc")),
24987	assert_instr(vextractf64x4, IMM8 = `1`)
24988	)]
24989	#[rustc_legacy_const_generics(`1`)]
24990	pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
24991	unsafe {
24992	static_assert_uimm_bits!(IMM8, `1`);
24993	match IMM8 & `0x1` {
24994	`0` => simd_shuffle!(a, _mm512_undefined_pd(), [`0`, `1`, `2`, `3`]),
24995	_ => simd_shuffle!(a, _mm512_undefined_pd(), [`4`, `5`, `6`, `7`]),
24996	}
24997	}
24998	}
24999
25000	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25001	///
25002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25003	#[inline]
25004	#[target_feature(enable = "avx512f")]
25005	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25006	#[cfg_attr(
25007	all(test, not(target_env = "msvc")),
25008	assert_instr(vextractf64x4, IMM8 = `1`)
25009	)]
25010	#[rustc_legacy_const_generics(`3`)]
25011	pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25012	src: __m256d,
25013	k: __mmask8,
25014	a: __m512d,
25015	) -> __m256d {
25016	unsafe {
25017	static_assert_uimm_bits!(IMM8, `1`);
25018	let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25019	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25020	}
25021	}
25022
25023	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25024	///
25025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25026	#[inline]
25027	#[target_feature(enable = "avx512f")]
25028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25029	#[cfg_attr(
25030	all(test, not(target_env = "msvc")),
25031	assert_instr(vextractf64x4, IMM8 = `1`)
25032	)]
25033	#[rustc_legacy_const_generics(`2`)]
25034	pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25035	unsafe {
25036	static_assert_uimm_bits!(IMM8, `1`);
25037	let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25038	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25039	}
25040	}
25041
25042	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25043	///
25044	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25045	#[inline]
25046	#[target_feature(enable = "avx512f")]
25047	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25048	#[cfg_attr(
25049	all(test, not(target_env = "msvc")),
25050	assert_instr(vextractf32x4, IMM2 = `3`) //should be vextracti32x4
25051	)]
25052	#[rustc_legacy_const_generics(`1`)]
25053	pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25054	unsafe {
25055	static_assert_uimm_bits!(IMM2, `2`);
25056	let a: i32x16 = a.as_i32x16();
25057	let zero: i32x16 = i32x16::ZERO;
25058	let extract: i32x4 = match IMM2 {
25059	`0` => simd_shuffle!(a, zero, [`0`, `1`, `2`, `3`]),
25060	`1` => simd_shuffle!(a, zero, [`4`, `5`, `6`, `7`]),
25061	`2` => simd_shuffle!(a, zero, [`8`, `9`, `10`, `11`]),
25062	_ => simd_shuffle!(a, zero, [`12`, `13`, `14`, `15`]),
25063	};
25064	transmute(src:extract)
25065	}
25066	}
25067
25068	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25069	///
25070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25071	#[inline]
25072	#[target_feature(enable = "avx512f")]
25073	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25074	#[cfg_attr(
25075	all(test, not(target_env = "msvc")),
25076	assert_instr(vextracti32x4, IMM2 = `3`)
25077	)]
25078	#[rustc_legacy_const_generics(`3`)]
25079	pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25080	src: __m128i,
25081	k: __mmask8,
25082	a: __m512i,
25083	) -> __m128i {
25084	unsafe {
25085	static_assert_uimm_bits!(IMM2, `2`);
25086	let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25087	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
25088	}
25089	}
25090
25091	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25092	///
25093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25094	#[inline]
25095	#[target_feature(enable = "avx512f")]
25096	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25097	#[cfg_attr(
25098	all(test, not(target_env = "msvc")),
25099	assert_instr(vextracti32x4, IMM2 = `3`)
25100	)]
25101	#[rustc_legacy_const_generics(`2`)]
25102	pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25103	unsafe {
25104	static_assert_uimm_bits!(IMM2, `2`);
25105	let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25106	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
25107	}
25108	}
25109
25110	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25111	///
25112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25113	#[inline]
25114	#[target_feature(enable = "avx512f,avx512vl")]
25115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25116	#[cfg_attr(
25117	all(test, not(target_env = "msvc")),
25118	assert_instr(vextract, IMM1 = `1`) //should be vextracti32x4
25119	)]
25120	#[rustc_legacy_const_generics(`1`)]
25121	pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25122	unsafe {
25123	static_assert_uimm_bits!(IMM1, `1`);
25124	let a: i32x8 = a.as_i32x8();
25125	let zero: i32x8 = i32x8::ZERO;
25126	let extract: i32x4 = match IMM1 {
25127	`0` => simd_shuffle!(a, zero, [`0`, `1`, `2`, `3`]),
25128	_ => simd_shuffle!(a, zero, [`4`, `5`, `6`, `7`]),
25129	};
25130	transmute(src:extract)
25131	}
25132	}
25133
25134	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25135	///
25136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25137	#[inline]
25138	#[target_feature(enable = "avx512f,avx512vl")]
25139	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25140	#[cfg_attr(
25141	all(test, not(target_env = "msvc")),
25142	assert_instr(vextracti32x4, IMM1 = `1`)
25143	)]
25144	#[rustc_legacy_const_generics(`3`)]
25145	pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25146	src: __m128i,
25147	k: __mmask8,
25148	a: __m256i,
25149	) -> __m128i {
25150	unsafe {
25151	static_assert_uimm_bits!(IMM1, `1`);
25152	let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
25153	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
25154	}
25155	}
25156
25157	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25158	///
25159	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25160	#[inline]
25161	#[target_feature(enable = "avx512f,avx512vl")]
25162	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25163	#[cfg_attr(
25164	all(test, not(target_env = "msvc")),
25165	assert_instr(vextracti32x4, IMM1 = `1`)
25166	)]
25167	#[rustc_legacy_const_generics(`2`)]
25168	pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25169	unsafe {
25170	static_assert_uimm_bits!(IMM1, `1`);
25171	let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
25172	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
25173	}
25174	}
25175
25176	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25177	///
25178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25179	#[inline]
25180	#[target_feature(enable = "avx512f")]
25181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25182	#[cfg_attr(test, assert_instr(vmovsldup))]
25183	pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25184	unsafe {
25185	let r: f32x16 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
25186	transmute(src:r)
25187	}
25188	}
25189
25190	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25191	///
25192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25193	#[inline]
25194	#[target_feature(enable = "avx512f")]
25195	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25196	#[cfg_attr(test, assert_instr(vmovsldup))]
25197	pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25198	unsafe {
25199	let mov: f32x16 =
25200	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
25201	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
25202	}
25203	}
25204
25205	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25206	///
25207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25208	#[inline]
25209	#[target_feature(enable = "avx512f")]
25210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25211	#[cfg_attr(test, assert_instr(vmovsldup))]
25212	pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25213	unsafe {
25214	let mov: f32x16 =
25215	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
25216	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
25217	}
25218	}
25219
25220	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25221	///
25222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25223	#[inline]
25224	#[target_feature(enable = "avx512f,avx512vl")]
25225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25226	#[cfg_attr(test, assert_instr(vmovsldup))]
25227	pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25228	unsafe {
25229	let mov: __m256 = _mm256_moveldup_ps(a);
25230	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
25231	}
25232	}
25233
25234	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25235	///
25236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25237	#[inline]
25238	#[target_feature(enable = "avx512f,avx512vl")]
25239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25240	#[cfg_attr(test, assert_instr(vmovsldup))]
25241	pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25242	unsafe {
25243	let mov: __m256 = _mm256_moveldup_ps(a);
25244	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
25245	}
25246	}
25247
25248	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25249	///
25250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25251	#[inline]
25252	#[target_feature(enable = "avx512f,avx512vl")]
25253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25254	#[cfg_attr(test, assert_instr(vmovsldup))]
25255	pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25256	unsafe {
25257	let mov: __m128 = _mm_moveldup_ps(a);
25258	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
25259	}
25260	}
25261
25262	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25263	///
25264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25265	#[inline]
25266	#[target_feature(enable = "avx512f,avx512vl")]
25267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25268	#[cfg_attr(test, assert_instr(vmovsldup))]
25269	pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25270	unsafe {
25271	let mov: __m128 = _mm_moveldup_ps(a);
25272	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
25273	}
25274	}
25275
25276	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25277	///
25278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25279	#[inline]
25280	#[target_feature(enable = "avx512f")]
25281	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25282	#[cfg_attr(test, assert_instr(vmovshdup))]
25283	pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25284	unsafe {
25285	let r: f32x16 = simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
25286	transmute(src:r)
25287	}
25288	}
25289
25290	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25291	///
25292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25293	#[inline]
25294	#[target_feature(enable = "avx512f")]
25295	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25296	#[cfg_attr(test, assert_instr(vmovshdup))]
25297	pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25298	unsafe {
25299	let mov: f32x16 =
25300	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
25301	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
25302	}
25303	}
25304
25305	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25306	///
25307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25308	#[inline]
25309	#[target_feature(enable = "avx512f")]
25310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25311	#[cfg_attr(test, assert_instr(vmovshdup))]
25312	pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25313	unsafe {
25314	let mov: f32x16 =
25315	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
25316	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
25317	}
25318	}
25319
25320	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25321	///
25322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25323	#[inline]
25324	#[target_feature(enable = "avx512f,avx512vl")]
25325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25326	#[cfg_attr(test, assert_instr(vmovshdup))]
25327	pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25328	unsafe {
25329	let mov: __m256 = _mm256_movehdup_ps(a);
25330	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
25331	}
25332	}
25333
25334	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25335	///
25336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25337	#[inline]
25338	#[target_feature(enable = "avx512f,avx512vl")]
25339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25340	#[cfg_attr(test, assert_instr(vmovshdup))]
25341	pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25342	unsafe {
25343	let mov: __m256 = _mm256_movehdup_ps(a);
25344	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
25345	}
25346	}
25347
25348	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25349	///
25350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25351	#[inline]
25352	#[target_feature(enable = "avx512f,avx512vl")]
25353	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25354	#[cfg_attr(test, assert_instr(vmovshdup))]
25355	pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25356	unsafe {
25357	let mov: __m128 = _mm_movehdup_ps(a);
25358	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
25359	}
25360	}
25361
25362	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25363	///
25364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25365	#[inline]
25366	#[target_feature(enable = "avx512f,avx512vl")]
25367	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25368	#[cfg_attr(test, assert_instr(vmovshdup))]
25369	pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25370	unsafe {
25371	let mov: __m128 = _mm_movehdup_ps(a);
25372	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
25373	}
25374	}
25375
25376	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25377	///
25378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25379	#[inline]
25380	#[target_feature(enable = "avx512f")]
25381	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25382	#[cfg_attr(test, assert_instr(vmovddup))]
25383	pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25384	unsafe {
25385	let r: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
25386	transmute(src:r)
25387	}
25388	}
25389
25390	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25391	///
25392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25393	#[inline]
25394	#[target_feature(enable = "avx512f")]
25395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25396	#[cfg_attr(test, assert_instr(vmovddup))]
25397	pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25398	unsafe {
25399	let mov: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
25400	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
25401	}
25402	}
25403
25404	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25405	///
25406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25407	#[inline]
25408	#[target_feature(enable = "avx512f")]
25409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25410	#[cfg_attr(test, assert_instr(vmovddup))]
25411	pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25412	unsafe {
25413	let mov: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
25414	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
25415	}
25416	}
25417
25418	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25419	///
25420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25421	#[inline]
25422	#[target_feature(enable = "avx512f,avx512vl")]
25423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25424	#[cfg_attr(test, assert_instr(vmovddup))]
25425	pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25426	unsafe {
25427	let mov: __m256d = _mm256_movedup_pd(a);
25428	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
25429	}
25430	}
25431
25432	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25433	///
25434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25435	#[inline]
25436	#[target_feature(enable = "avx512f,avx512vl")]
25437	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25438	#[cfg_attr(test, assert_instr(vmovddup))]
25439	pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25440	unsafe {
25441	let mov: __m256d = _mm256_movedup_pd(a);
25442	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:f64x4::ZERO))
25443	}
25444	}
25445
25446	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25447	///
25448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25449	#[inline]
25450	#[target_feature(enable = "avx512f,avx512vl")]
25451	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25452	#[cfg_attr(test, assert_instr(vmovddup))]
25453	pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25454	unsafe {
25455	let mov: __m128d = _mm_movedup_pd(a);
25456	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
25457	}
25458	}
25459
25460	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25461	///
25462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25463	#[inline]
25464	#[target_feature(enable = "avx512f,avx512vl")]
25465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25466	#[cfg_attr(test, assert_instr(vmovddup))]
25467	pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25468	unsafe {
25469	let mov: __m128d = _mm_movedup_pd(a);
25470	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:f64x2::ZERO))
25471	}
25472	}
25473
25474	/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25475	///
25476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25477	#[inline]
25478	#[target_feature(enable = "avx512f")]
25479	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25480	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))] //should be vinserti32x4
25481	#[rustc_legacy_const_generics(`2`)]
25482	pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25483	unsafe {
25484	static_assert_uimm_bits!(IMM8, `2`);
25485	let a = a.as_i32x16();
25486	let b = _mm512_castsi128_si512(b).as_i32x16();
25487	let ret: i32x16 = match IMM8 & `0b11` {
25488	`0` => {
25489	simd_shuffle!(
25490	a,
25491	b,
25492	[`16`, `17`, `18`, `19`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
25493	)
25494	}
25495	`1` => {
25496	simd_shuffle!(
25497	a,
25498	b,
25499	[`0`, `1`, `2`, `3`, `16`, `17`, `18`, `19`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
25500	)
25501	}
25502	`2` => {
25503	simd_shuffle!(
25504	a,
25505	b,
25506	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `12`, `13`, `14`, `15`],
25507	)
25508	}
25509	_ => {
25510	simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `16`, `17`, `18`, `19`])
25511	}
25512	};
25513	transmute(ret)
25514	}
25515	}
25516
25517	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25518	///
25519	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25520	#[inline]
25521	#[target_feature(enable = "avx512f")]
25522	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25523	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `2`))]
25524	#[rustc_legacy_const_generics(`4`)]
25525	pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25526	src: __m512i,
25527	k: __mmask16,
25528	a: __m512i,
25529	b: __m128i,
25530	) -> __m512i {
25531	unsafe {
25532	static_assert_uimm_bits!(IMM8, `2`);
25533	let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
25534	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
25535	}
25536	}
25537
25538	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25539	///
25540	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25541	#[inline]
25542	#[target_feature(enable = "avx512f")]
25543	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25544	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `2`))]
25545	#[rustc_legacy_const_generics(`3`)]
25546	pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25547	unsafe {
25548	static_assert_uimm_bits!(IMM8, `2`);
25549	let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
25550	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
25551	}
25552	}
25553
25554	/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25555	///
25556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25557	#[inline]
25558	#[target_feature(enable = "avx512f,avx512vl")]
25559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25560	#[cfg_attr(
25561	all(test, not(target_env = "msvc")),
25562	assert_instr(vinsert, IMM8 = `1`) //should be vinserti32x4
25563	)]
25564	#[rustc_legacy_const_generics(`2`)]
25565	pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25566	unsafe {
25567	static_assert_uimm_bits!(IMM8, `1`);
25568	let a: i32x8 = a.as_i32x8();
25569	let b: i32x8 = _mm256_castsi128_si256(b).as_i32x8();
25570	let ret: i32x8 = match IMM8 & `0b1` {
25571	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
25572	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
25573	};
25574	transmute(src:ret)
25575	}
25576	}
25577
25578	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25579	///
25580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25581	#[inline]
25582	#[target_feature(enable = "avx512f,avx512vl")]
25583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25584	#[cfg_attr(
25585	all(test, not(target_env = "msvc")),
25586	assert_instr(vinserti32x4, IMM8 = `1`)
25587	)]
25588	#[rustc_legacy_const_generics(`4`)]
25589	pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25590	src: __m256i,
25591	k: __mmask8,
25592	a: __m256i,
25593	b: __m128i,
25594	) -> __m256i {
25595	unsafe {
25596	static_assert_uimm_bits!(IMM8, `1`);
25597	let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
25598	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
25599	}
25600	}
25601
25602	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25603	///
25604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25605	#[inline]
25606	#[target_feature(enable = "avx512f,avx512vl")]
25607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25608	#[cfg_attr(
25609	all(test, not(target_env = "msvc")),
25610	assert_instr(vinserti32x4, IMM8 = `1`)
25611	)]
25612	#[rustc_legacy_const_generics(`3`)]
25613	pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25614	unsafe {
25615	static_assert_uimm_bits!(IMM8, `1`);
25616	let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
25617	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
25618	}
25619	}
25620
25621	/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25622	///
25623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25624	#[inline]
25625	#[target_feature(enable = "avx512f")]
25626	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25627	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))] //should be vinserti64x4
25628	#[rustc_legacy_const_generics(`2`)]
25629	pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25630	unsafe {
25631	static_assert_uimm_bits!(IMM8, `1`);
25632	let b: __m512i = _mm512_castsi256_si512(b);
25633	match IMM8 & `0b1` {
25634	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
25635	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
25636	}
25637	}
25638	}
25639
25640	/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25641	///
25642	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25643	#[inline]
25644	#[target_feature(enable = "avx512f")]
25645	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25646	#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = `1`))]
25647	#[rustc_legacy_const_generics(`4`)]
25648	pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25649	src: __m512i,
25650	k: __mmask8,
25651	a: __m512i,
25652	b: __m256i,
25653	) -> __m512i {
25654	unsafe {
25655	static_assert_uimm_bits!(IMM8, `1`);
25656	let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
25657	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
25658	}
25659	}
25660
25661	/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25662	///
25663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25664	#[inline]
25665	#[target_feature(enable = "avx512f")]
25666	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25667	#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = `1`))]
25668	#[rustc_legacy_const_generics(`3`)]
25669	pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25670	unsafe {
25671	static_assert_uimm_bits!(IMM8, `1`);
25672	let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
25673	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
25674	}
25675	}
25676
25677	/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25678	///
25679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25680	#[inline]
25681	#[target_feature(enable = "avx512f")]
25682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25683	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
25684	#[rustc_legacy_const_generics(`2`)]
25685	pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25686	unsafe {
25687	static_assert_uimm_bits!(IMM8, `2`);
25688	let b = _mm512_castps128_ps512(b);
25689	match IMM8 & `0b11` {
25690	`0` => {
25691	simd_shuffle!(
25692	a,
25693	b,
25694	[`16`, `17`, `18`, `19`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
25695	)
25696	}
25697	`1` => {
25698	simd_shuffle!(
25699	a,
25700	b,
25701	[`0`, `1`, `2`, `3`, `16`, `17`, `18`, `19`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
25702	)
25703	}
25704	`2` => {
25705	simd_shuffle!(
25706	a,
25707	b,
25708	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `12`, `13`, `14`, `15`],
25709	)
25710	}
25711	_ => {
25712	simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `16`, `17`, `18`, `19`])
25713	}
25714	}
25715	}
25716	}
25717
25718	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25719	///
25720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25721	#[inline]
25722	#[target_feature(enable = "avx512f")]
25723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25724	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
25725	#[rustc_legacy_const_generics(`4`)]
25726	pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25727	src: __m512,
25728	k: __mmask16,
25729	a: __m512,
25730	b: __m128,
25731	) -> __m512 {
25732	unsafe {
25733	static_assert_uimm_bits!(IMM8, `2`);
25734	let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
25735	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
25736	}
25737	}
25738
25739	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25740	///
25741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25742	#[inline]
25743	#[target_feature(enable = "avx512f")]
25744	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25745	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
25746	#[rustc_legacy_const_generics(`3`)]
25747	pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25748	unsafe {
25749	static_assert_uimm_bits!(IMM8, `2`);
25750	let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
25751	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
25752	}
25753	}
25754
25755	/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25756	///
25757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25758	#[inline]
25759	#[target_feature(enable = "avx512f,avx512vl")]
25760	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25761	#[cfg_attr(
25762	all(test, not(target_env = "msvc")),
25763	assert_instr(vinsert, IMM8 = `1`) //should be vinsertf32x4
25764	)]
25765	#[rustc_legacy_const_generics(`2`)]
25766	pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25767	unsafe {
25768	static_assert_uimm_bits!(IMM8, `1`);
25769	let b: __m256 = _mm256_castps128_ps256(b);
25770	match IMM8 & `0b1` {
25771	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
25772	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
25773	}
25774	}
25775	}
25776
25777	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25778	///
25779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25780	#[inline]
25781	#[target_feature(enable = "avx512f,avx512vl")]
25782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25783	#[cfg_attr(
25784	all(test, not(target_env = "msvc")),
25785	assert_instr(vinsertf32x4, IMM8 = `1`)
25786	)]
25787	#[rustc_legacy_const_generics(`4`)]
25788	pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25789	src: __m256,
25790	k: __mmask8,
25791	a: __m256,
25792	b: __m128,
25793	) -> __m256 {
25794	unsafe {
25795	static_assert_uimm_bits!(IMM8, `1`);
25796	let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
25797	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
25798	}
25799	}
25800
25801	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25802	///
25803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25804	#[inline]
25805	#[target_feature(enable = "avx512f,avx512vl")]
25806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25807	#[cfg_attr(
25808	all(test, not(target_env = "msvc")),
25809	assert_instr(vinsertf32x4, IMM8 = `1`)
25810	)]
25811	#[rustc_legacy_const_generics(`3`)]
25812	pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25813	unsafe {
25814	static_assert_uimm_bits!(IMM8, `1`);
25815	let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
25816	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
25817	}
25818	}
25819
25820	/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25821	///
25822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25823	#[inline]
25824	#[target_feature(enable = "avx512f")]
25825	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25826	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
25827	#[rustc_legacy_const_generics(`2`)]
25828	pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25829	unsafe {
25830	static_assert_uimm_bits!(IMM8, `1`);
25831	let b: __m512d = _mm512_castpd256_pd512(b);
25832	match IMM8 & `0b1` {
25833	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
25834	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
25835	}
25836	}
25837	}
25838
25839	/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25840	///
25841	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25842	#[inline]
25843	#[target_feature(enable = "avx512f")]
25844	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25845	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
25846	#[rustc_legacy_const_generics(`4`)]
25847	pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25848	src: __m512d,
25849	k: __mmask8,
25850	a: __m512d,
25851	b: __m256d,
25852	) -> __m512d {
25853	unsafe {
25854	static_assert_uimm_bits!(IMM8, `1`);
25855	let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
25856	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25857	}
25858	}
25859
25860	/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25861	///
25862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25863	#[inline]
25864	#[target_feature(enable = "avx512f")]
25865	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25866	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
25867	#[rustc_legacy_const_generics(`3`)]
25868	pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25869	unsafe {
25870	static_assert_uimm_bits!(IMM8, `1`);
25871	let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
25872	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25873	}
25874	}
25875
25876	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25877	///
25878	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25879	#[inline]
25880	#[target_feature(enable = "avx512f")]
25881	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25882	#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25883	pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25884	unsafe {
25885	let a: i32x16 = a.as_i32x16();
25886	let b: i32x16 = b.as_i32x16();
25887	#[rustfmt::skip]
25888	let r: i32x16 = simd_shuffle!(
25889	a, b,
25890	[ `2`, `18`, `3`, `19`,
25891	`2` + `4`, `18` + `4`, `3` + `4`, `19` + `4`,
25892	`2` + `8`, `18` + `8`, `3` + `8`, `19` + `8`,
25893	`2` + `12`, `18` + `12`, `3` + `12`, `19` + `12`],
25894	);
25895	transmute(src:r)
25896	}
25897	}
25898
25899	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25900	///
25901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25902	#[inline]
25903	#[target_feature(enable = "avx512f")]
25904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25905	#[cfg_attr(test, assert_instr(vpunpckhdq))]
25906	pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25907	unsafe {
25908	let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
25909	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
25910	}
25911	}
25912
25913	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25914	///
25915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25916	#[inline]
25917	#[target_feature(enable = "avx512f")]
25918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25919	#[cfg_attr(test, assert_instr(vpunpckhdq))]
25920	pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25921	unsafe {
25922	let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
25923	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x16::ZERO))
25924	}
25925	}
25926
25927	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25928	///
25929	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25930	#[inline]
25931	#[target_feature(enable = "avx512f,avx512vl")]
25932	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25933	#[cfg_attr(test, assert_instr(vpunpckhdq))]
25934	pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25935	unsafe {
25936	let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
25937	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
25938	}
25939	}
25940
25941	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25942	///
25943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25944	#[inline]
25945	#[target_feature(enable = "avx512f,avx512vl")]
25946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25947	#[cfg_attr(test, assert_instr(vpunpckhdq))]
25948	pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25949	unsafe {
25950	let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
25951	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x8::ZERO))
25952	}
25953	}
25954
25955	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25956	///
25957	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25958	#[inline]
25959	#[target_feature(enable = "avx512f,avx512vl")]
25960	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25961	#[cfg_attr(test, assert_instr(vpunpckhdq))]
25962	pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25963	unsafe {
25964	let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
25965	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
25966	}
25967	}
25968
25969	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25970	///
25971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25972	#[inline]
25973	#[target_feature(enable = "avx512f,avx512vl")]
25974	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25975	#[cfg_attr(test, assert_instr(vpunpckhdq))]
25976	pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25977	unsafe {
25978	let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
25979	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x4::ZERO))
25980	}
25981	}
25982
25983	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25984	///
25985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
25986	#[inline]
25987	#[target_feature(enable = "avx512f")]
25988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25989	#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
25990	pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
25991	unsafe { simd_shuffle!(a, b, [`1`, `9`, `1` + `2`, `9` + `2`, `1` + `4`, `9` + `4`, `1` + `6`, `9` + `6`]) }
25992	}
25993
25994	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25995	///
25996	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
25997	#[inline]
25998	#[target_feature(enable = "avx512f")]
25999	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26000	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26001	pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26002	unsafe {
26003	let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
26004	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
26005	}
26006	}
26007
26008	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26009	///
26010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26011	#[inline]
26012	#[target_feature(enable = "avx512f")]
26013	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26015	pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26016	unsafe {
26017	let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
26018	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x8::ZERO))
26019	}
26020	}
26021
26022	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26023	///
26024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26025	#[inline]
26026	#[target_feature(enable = "avx512f,avx512vl")]
26027	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26028	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26029	pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26030	unsafe {
26031	let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
26032	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
26033	}
26034	}
26035
26036	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26037	///
26038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26039	#[inline]
26040	#[target_feature(enable = "avx512f,avx512vl")]
26041	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26042	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26043	pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26044	unsafe {
26045	let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
26046	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x4::ZERO))
26047	}
26048	}
26049
26050	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26051	///
26052	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26053	#[inline]
26054	#[target_feature(enable = "avx512f,avx512vl")]
26055	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26056	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26057	pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26058	unsafe {
26059	let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
26060	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
26061	}
26062	}
26063
26064	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26065	///
26066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26067	#[inline]
26068	#[target_feature(enable = "avx512f,avx512vl")]
26069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26070	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26071	pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26072	unsafe {
26073	let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
26074	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x2::ZERO))
26075	}
26076	}
26077
26078	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26079	///
26080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26081	#[inline]
26082	#[target_feature(enable = "avx512f")]
26083	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26084	#[cfg_attr(test, assert_instr(vunpckhps))]
26085	pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26086	unsafe {
26087	#[rustfmt::skip]
26088	simd_shuffle!(
26089	a, b,
26090	[ `2`, `18`, `3`, `19`,
26091	`2` + `4`, `18` + `4`, `3` + `4`, `19` + `4`,
26092	`2` + `8`, `18` + `8`, `3` + `8`, `19` + `8`,
26093	`2` + `12`, `18` + `12`, `3` + `12`, `19` + `12`],
26094	)
26095	}
26096	}
26097
26098	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099	///
26100	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26101	#[inline]
26102	#[target_feature(enable = "avx512f")]
26103	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26104	#[cfg_attr(test, assert_instr(vunpckhps))]
26105	pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26106	unsafe {
26107	let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
26108	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
26109	}
26110	}
26111
26112	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26113	///
26114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26115	#[inline]
26116	#[target_feature(enable = "avx512f")]
26117	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26118	#[cfg_attr(test, assert_instr(vunpckhps))]
26119	pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26120	unsafe {
26121	let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
26122	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x16::ZERO))
26123	}
26124	}
26125
26126	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26127	///
26128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26129	#[inline]
26130	#[target_feature(enable = "avx512f,avx512vl")]
26131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26132	#[cfg_attr(test, assert_instr(vunpckhps))]
26133	pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26134	unsafe {
26135	let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
26136	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
26137	}
26138	}
26139
26140	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26141	///
26142	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26143	#[inline]
26144	#[target_feature(enable = "avx512f,avx512vl")]
26145	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26146	#[cfg_attr(test, assert_instr(vunpckhps))]
26147	pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26148	unsafe {
26149	let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
26150	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x8::ZERO))
26151	}
26152	}
26153
26154	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26155	///
26156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26157	#[inline]
26158	#[target_feature(enable = "avx512f,avx512vl")]
26159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26160	#[cfg_attr(test, assert_instr(vunpckhps))]
26161	pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26162	unsafe {
26163	let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
26164	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
26165	}
26166	}
26167
26168	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26169	///
26170	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26171	#[inline]
26172	#[target_feature(enable = "avx512f,avx512vl")]
26173	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26174	#[cfg_attr(test, assert_instr(vunpckhps))]
26175	pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26176	unsafe {
26177	let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
26178	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x4::ZERO))
26179	}
26180	}
26181
26182	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26183	///
26184	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26185	#[inline]
26186	#[target_feature(enable = "avx512f")]
26187	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26188	#[cfg_attr(test, assert_instr(vunpckhpd))]
26189	pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26190	unsafe { simd_shuffle!(a, b, [`1`, `9`, `1` + `2`, `9` + `2`, `1` + `4`, `9` + `4`, `1` + `6`, `9` + `6`]) }
26191	}
26192
26193	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26194	///
26195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26196	#[inline]
26197	#[target_feature(enable = "avx512f")]
26198	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26199	#[cfg_attr(test, assert_instr(vunpckhpd))]
26200	pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26201	unsafe {
26202	let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
26203	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
26204	}
26205	}
26206
26207	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26208	///
26209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26210	#[inline]
26211	#[target_feature(enable = "avx512f")]
26212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26213	#[cfg_attr(test, assert_instr(vunpckhpd))]
26214	pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26215	unsafe {
26216	let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
26217	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x8::ZERO))
26218	}
26219	}
26220
26221	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26222	///
26223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26224	#[inline]
26225	#[target_feature(enable = "avx512f,avx512vl")]
26226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26227	#[cfg_attr(test, assert_instr(vunpckhpd))]
26228	pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26229	unsafe {
26230	let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
26231	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
26232	}
26233	}
26234
26235	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26236	///
26237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26238	#[inline]
26239	#[target_feature(enable = "avx512f,avx512vl")]
26240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26241	#[cfg_attr(test, assert_instr(vunpckhpd))]
26242	pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26243	unsafe {
26244	let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
26245	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x4::ZERO))
26246	}
26247	}
26248
26249	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26250	///
26251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26252	#[inline]
26253	#[target_feature(enable = "avx512f,avx512vl")]
26254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26255	#[cfg_attr(test, assert_instr(vunpckhpd))]
26256	pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26257	unsafe {
26258	let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
26259	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
26260	}
26261	}
26262
26263	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26264	///
26265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26266	#[inline]
26267	#[target_feature(enable = "avx512f,avx512vl")]
26268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26269	#[cfg_attr(test, assert_instr(vunpckhpd))]
26270	pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26271	unsafe {
26272	let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
26273	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x2::ZERO))
26274	}
26275	}
26276
26277	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26278	///
26279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26280	#[inline]
26281	#[target_feature(enable = "avx512f")]
26282	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26283	#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26284	pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26285	unsafe {
26286	let a: i32x16 = a.as_i32x16();
26287	let b: i32x16 = b.as_i32x16();
26288	#[rustfmt::skip]
26289	let r: i32x16 = simd_shuffle!(
26290	a, b,
26291	[ `0`, `16`, `1`, `17`,
26292	`0` + `4`, `16` + `4`, `1` + `4`, `17` + `4`,
26293	`0` + `8`, `16` + `8`, `1` + `8`, `17` + `8`,
26294	`0` + `12`, `16` + `12`, `1` + `12`, `17` + `12`],
26295	);
26296	transmute(src:r)
26297	}
26298	}
26299
26300	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26301	///
26302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26303	#[inline]
26304	#[target_feature(enable = "avx512f")]
26305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26306	#[cfg_attr(test, assert_instr(vpunpckldq))]
26307	pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26308	unsafe {
26309	let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
26310	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
26311	}
26312	}
26313
26314	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26315	///
26316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26317	#[inline]
26318	#[target_feature(enable = "avx512f")]
26319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26320	#[cfg_attr(test, assert_instr(vpunpckldq))]
26321	pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26322	unsafe {
26323	let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
26324	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x16::ZERO))
26325	}
26326	}
26327
26328	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26329	///
26330	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26331	#[inline]
26332	#[target_feature(enable = "avx512f,avx512vl")]
26333	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26334	#[cfg_attr(test, assert_instr(vpunpckldq))]
26335	pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26336	unsafe {
26337	let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
26338	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
26339	}
26340	}
26341
26342	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26343	///
26344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26345	#[inline]
26346	#[target_feature(enable = "avx512f,avx512vl")]
26347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26348	#[cfg_attr(test, assert_instr(vpunpckldq))]
26349	pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26350	unsafe {
26351	let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
26352	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x8::ZERO))
26353	}
26354	}
26355
26356	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26357	///
26358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26359	#[inline]
26360	#[target_feature(enable = "avx512f,avx512vl")]
26361	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26362	#[cfg_attr(test, assert_instr(vpunpckldq))]
26363	pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26364	unsafe {
26365	let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
26366	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
26367	}
26368	}
26369
26370	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26371	///
26372	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26373	#[inline]
26374	#[target_feature(enable = "avx512f,avx512vl")]
26375	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26376	#[cfg_attr(test, assert_instr(vpunpckldq))]
26377	pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26378	unsafe {
26379	let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
26380	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x4::ZERO))
26381	}
26382	}
26383
26384	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26385	///
26386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26387	#[inline]
26388	#[target_feature(enable = "avx512f")]
26389	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26390	#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26391	pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26392	unsafe { simd_shuffle!(a, b, [`0`, `8`, `0` + `2`, `8` + `2`, `0` + `4`, `8` + `4`, `0` + `6`, `8` + `6`]) }
26393	}
26394
26395	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26396	///
26397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26398	#[inline]
26399	#[target_feature(enable = "avx512f")]
26400	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26401	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26402	pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26403	unsafe {
26404	let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
26405	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
26406	}
26407	}
26408
26409	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26410	///
26411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26412	#[inline]
26413	#[target_feature(enable = "avx512f")]
26414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26415	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26416	pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26417	unsafe {
26418	let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
26419	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x8::ZERO))
26420	}
26421	}
26422
26423	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26424	///
26425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26426	#[inline]
26427	#[target_feature(enable = "avx512f,avx512vl")]
26428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26429	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26430	pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26431	unsafe {
26432	let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
26433	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
26434	}
26435	}
26436
26437	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26438	///
26439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26440	#[inline]
26441	#[target_feature(enable = "avx512f,avx512vl")]
26442	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26443	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26444	pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26445	unsafe {
26446	let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
26447	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x4::ZERO))
26448	}
26449	}
26450
26451	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26452	///
26453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26454	#[inline]
26455	#[target_feature(enable = "avx512f,avx512vl")]
26456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26457	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26458	pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26459	unsafe {
26460	let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
26461	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
26462	}
26463	}
26464
26465	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26466	///
26467	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26468	#[inline]
26469	#[target_feature(enable = "avx512f,avx512vl")]
26470	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26471	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26472	pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26473	unsafe {
26474	let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
26475	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x2::ZERO))
26476	}
26477	}
26478
26479	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26480	///
26481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26482	#[inline]
26483	#[target_feature(enable = "avx512f")]
26484	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26485	#[cfg_attr(test, assert_instr(vunpcklps))]
26486	pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26487	unsafe {
26488	#[rustfmt::skip]
26489	simd_shuffle!(a, b,
26490	[ `0`, `16`, `1`, `17`,
26491	`0` + `4`, `16` + `4`, `1` + `4`, `17` + `4`,
26492	`0` + `8`, `16` + `8`, `1` + `8`, `17` + `8`,
26493	`0` + `12`, `16` + `12`, `1` + `12`, `17` + `12`],
26494	)
26495	}
26496	}
26497
26498	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26499	///
26500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26501	#[inline]
26502	#[target_feature(enable = "avx512f")]
26503	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26504	#[cfg_attr(test, assert_instr(vunpcklps))]
26505	pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26506	unsafe {
26507	let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
26508	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
26509	}
26510	}
26511
26512	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26513	///
26514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26515	#[inline]
26516	#[target_feature(enable = "avx512f")]
26517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26518	#[cfg_attr(test, assert_instr(vunpcklps))]
26519	pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26520	unsafe {
26521	let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
26522	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x16::ZERO))
26523	}
26524	}
26525
26526	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26527	///
26528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26529	#[inline]
26530	#[target_feature(enable = "avx512f,avx512vl")]
26531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26532	#[cfg_attr(test, assert_instr(vunpcklps))]
26533	pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26534	unsafe {
26535	let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
26536	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
26537	}
26538	}
26539
26540	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26541	///
26542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26543	#[inline]
26544	#[target_feature(enable = "avx512f,avx512vl")]
26545	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26546	#[cfg_attr(test, assert_instr(vunpcklps))]
26547	pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26548	unsafe {
26549	let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
26550	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x8::ZERO))
26551	}
26552	}
26553
26554	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26555	///
26556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26557	#[inline]
26558	#[target_feature(enable = "avx512f,avx512vl")]
26559	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26560	#[cfg_attr(test, assert_instr(vunpcklps))]
26561	pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26562	unsafe {
26563	let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
26564	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
26565	}
26566	}
26567
26568	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26569	///
26570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26571	#[inline]
26572	#[target_feature(enable = "avx512f,avx512vl")]
26573	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26574	#[cfg_attr(test, assert_instr(vunpcklps))]
26575	pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26576	unsafe {
26577	let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
26578	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x4::ZERO))
26579	}
26580	}
26581
26582	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26583	///
26584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26585	#[inline]
26586	#[target_feature(enable = "avx512f")]
26587	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26588	#[cfg_attr(test, assert_instr(vunpcklpd))]
26589	pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26590	unsafe { simd_shuffle!(a, b, [`0`, `8`, `0` + `2`, `8` + `2`, `0` + `4`, `8` + `4`, `0` + `6`, `8` + `6`]) }
26591	}
26592
26593	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26594	///
26595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26596	#[inline]
26597	#[target_feature(enable = "avx512f")]
26598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26599	#[cfg_attr(test, assert_instr(vunpcklpd))]
26600	pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26601	unsafe {
26602	let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
26603	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
26604	}
26605	}
26606
26607	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26608	///
26609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26610	#[inline]
26611	#[target_feature(enable = "avx512f")]
26612	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26613	#[cfg_attr(test, assert_instr(vunpcklpd))]
26614	pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26615	unsafe {
26616	let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
26617	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x8::ZERO))
26618	}
26619	}
26620
26621	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26622	///
26623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26624	#[inline]
26625	#[target_feature(enable = "avx512f,avx512vl")]
26626	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26627	#[cfg_attr(test, assert_instr(vunpcklpd))]
26628	pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26629	unsafe {
26630	let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
26631	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
26632	}
26633	}
26634
26635	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26636	///
26637	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26638	#[inline]
26639	#[target_feature(enable = "avx512f,avx512vl")]
26640	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26641	#[cfg_attr(test, assert_instr(vunpcklpd))]
26642	pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26643	unsafe {
26644	let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
26645	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x4::ZERO))
26646	}
26647	}
26648
26649	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26650	///
26651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26652	#[inline]
26653	#[target_feature(enable = "avx512f,avx512vl")]
26654	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26655	#[cfg_attr(test, assert_instr(vunpcklpd))]
26656	pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26657	unsafe {
26658	let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
26659	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
26660	}
26661	}
26662
26663	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26664	///
26665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26666	#[inline]
26667	#[target_feature(enable = "avx512f,avx512vl")]
26668	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26669	#[cfg_attr(test, assert_instr(vunpcklpd))]
26670	pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26671	unsafe {
26672	let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
26673	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x2::ZERO))
26674	}
26675	}
26676
26677	/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26678	///
26679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26680	#[inline]
26681	#[target_feature(enable = "avx512f")]
26682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26683	pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26684	unsafe {
26685	simd_shuffle!(
26686	a,
26687	_mm_undefined_ps(),
26688	[`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`],
26689	)
26690	}
26691	}
26692
26693	/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26694	///
26695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26696	#[inline]
26697	#[target_feature(enable = "avx512f")]
26698	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26699	pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26700	unsafe {
26701	simd_shuffle!(
26702	a,
26703	_mm256_undefined_ps(),
26704	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
26705	)
26706	}
26707	}
26708
26709	/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26710	///
26711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26712	#[inline]
26713	#[target_feature(enable = "avx512f")]
26714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26715	pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26716	unsafe {
26717	simd_shuffle!(
26718	a,
26719	_mm_set1_ps(`0.`),
26720	[`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`],
26721	)
26722	}
26723	}
26724
26725	/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26726	///
26727	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26728	#[inline]
26729	#[target_feature(enable = "avx512f")]
26730	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26731	pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26732	unsafe {
26733	simd_shuffle!(
26734	a,
26735	_mm256_set1_ps(`0.`),
26736	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
26737	)
26738	}
26739	}
26740
26741	/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26742	///
26743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26744	#[inline]
26745	#[target_feature(enable = "avx512f")]
26746	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26747	pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26748	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]) }
26749	}
26750
26751	/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26752	///
26753	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26754	#[inline]
26755	#[target_feature(enable = "avx512f")]
26756	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26757	pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26758	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]) }
26759	}
26760
26761	/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762	///
26763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26764	#[inline]
26765	#[target_feature(enable = "avx512f")]
26766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26767	pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26768	unsafe { transmute(src:a) }
26769	}
26770
26771	/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772	///
26773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26774	#[inline]
26775	#[target_feature(enable = "avx512f")]
26776	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26777	pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26778	unsafe { transmute(src:a) }
26779	}
26780
26781	/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782	///
26783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26784	#[inline]
26785	#[target_feature(enable = "avx512f")]
26786	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26787	pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26788	unsafe { simd_shuffle!(a, _mm_undefined_pd(), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
26789	}
26790
26791	/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792	///
26793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26794	#[inline]
26795	#[target_feature(enable = "avx512f")]
26796	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26797	pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26798	unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
26799	}
26800
26801	/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802	///
26803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26804	#[inline]
26805	#[target_feature(enable = "avx512f")]
26806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26807	pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26808	unsafe { simd_shuffle!(a, _mm_set1_pd(`0.`), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
26809	}
26810
26811	/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812	///
26813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26814	#[inline]
26815	#[target_feature(enable = "avx512f")]
26816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26817	pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26818	unsafe { simd_shuffle!(a, _mm256_set1_pd(`0.`), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
26819	}
26820
26821	/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822	///
26823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26824	#[inline]
26825	#[target_feature(enable = "avx512f")]
26826	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26827	pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26828	unsafe { simd_shuffle!(a, a, [`0`, `1`]) }
26829	}
26830
26831	/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832	///
26833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26834	#[inline]
26835	#[target_feature(enable = "avx512f")]
26836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837	pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26838	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]) }
26839	}
26840
26841	/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842	///
26843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26844	#[inline]
26845	#[target_feature(enable = "avx512f")]
26846	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26847	pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26848	unsafe { transmute(src:a) }
26849	}
26850
26851	/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852	///
26853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26854	#[inline]
26855	#[target_feature(enable = "avx512f")]
26856	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857	pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26858	unsafe { transmute(src:a) }
26859	}
26860
26861	/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862	///
26863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26864	#[inline]
26865	#[target_feature(enable = "avx512f")]
26866	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26867	pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26868	unsafe { simd_shuffle!(a, _mm_undefined_si128(), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
26869	}
26870
26871	/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872	///
26873	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26874	#[inline]
26875	#[target_feature(enable = "avx512f")]
26876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26877	pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26878	unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
26879	}
26880
26881	/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882	///
26883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26884	#[inline]
26885	#[target_feature(enable = "avx512f")]
26886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26887	pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26888	unsafe { simd_shuffle!(a, _mm_setzero_si128(), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
26889	}
26890
26891	/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892	///
26893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26894	#[inline]
26895	#[target_feature(enable = "avx512f")]
26896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26897	pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26898	unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
26899	}
26900
26901	/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902	///
26903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26904	#[inline]
26905	#[target_feature(enable = "avx512f")]
26906	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26907	pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26908	unsafe { simd_shuffle!(a, a, [`0`, `1`]) }
26909	}
26910
26911	/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912	///
26913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26914	#[inline]
26915	#[target_feature(enable = "avx512f")]
26916	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26917	pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26918	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]) }
26919	}
26920
26921	/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922	///
26923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26924	#[inline]
26925	#[target_feature(enable = "avx512f")]
26926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26927	pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26928	unsafe { transmute(src:a) }
26929	}
26930
26931	/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932	///
26933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26934	#[inline]
26935	#[target_feature(enable = "avx512f")]
26936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26937	pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26938	unsafe { transmute(src:a) }
26939	}
26940
26941	/// Copy the lower 32-bit integer in a to dst.
26942	///
26943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26944	#[inline]
26945	#[target_feature(enable = "avx512f")]
26946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26947	#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))]
26948	pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26949	unsafe { simd_extract!(a.as_i32x16(), `0`) }
26950	}
26951
26952	/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26953	///
26954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26955	#[inline]
26956	#[target_feature(enable = "avx512f")]
26957	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26958	pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26959	unsafe { simd_extract!(a, `0`) }
26960	}
26961
26962	/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26963	///
26964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26965	#[inline]
26966	#[target_feature(enable = "avx512f")]
26967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26968	pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26969	unsafe { simd_extract!(a, `0`) }
26970	}
26971
26972	/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26973	///
26974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26975	#[inline]
26976	#[target_feature(enable = "avx512f")]
26977	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26978	#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26979	pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
26980	unsafe {
26981	let a: i32x16 = _mm512_castsi128_si512(a).as_i32x16();
26982	let ret: i32x16 = simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
26983	transmute(src:ret)
26984	}
26985	}
26986
26987	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26988	///
26989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
26990	#[inline]
26991	#[target_feature(enable = "avx512f")]
26992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26993	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26994	pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
26995	unsafe {
26996	let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
26997	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
26998	}
26999	}
27000
27001	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27002	///
27003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27004	#[inline]
27005	#[target_feature(enable = "avx512f")]
27006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27007	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27008	pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27009	unsafe {
27010	let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
27011	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
27012	}
27013	}
27014
27015	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27016	///
27017	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27018	#[inline]
27019	#[target_feature(enable = "avx512f,avx512vl")]
27020	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27021	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27022	pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27023	unsafe {
27024	let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
27025	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
27026	}
27027	}
27028
27029	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27030	///
27031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27032	#[inline]
27033	#[target_feature(enable = "avx512f,avx512vl")]
27034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27035	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27036	pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27037	unsafe {
27038	let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
27039	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
27040	}
27041	}
27042
27043	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27044	///
27045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27046	#[inline]
27047	#[target_feature(enable = "avx512f,avx512vl")]
27048	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27049	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27050	pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27051	unsafe {
27052	let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
27053	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
27054	}
27055	}
27056
27057	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27058	///
27059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27060	#[inline]
27061	#[target_feature(enable = "avx512f,avx512vl")]
27062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27063	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27064	pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27065	unsafe {
27066	let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
27067	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x4::ZERO))
27068	}
27069	}
27070
27071	/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27072	///
27073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27074	#[inline]
27075	#[target_feature(enable = "avx512f")]
27076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27077	#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27078	pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27079	unsafe { simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]) }
27080	}
27081
27082	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27083	///
27084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27085	#[inline]
27086	#[target_feature(enable = "avx512f")]
27087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27088	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27089	pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27090	unsafe {
27091	let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
27092	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
27093	}
27094	}
27095
27096	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27097	///
27098	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27099	#[inline]
27100	#[target_feature(enable = "avx512f")]
27101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27102	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27103	pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27104	unsafe {
27105	let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
27106	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
27107	}
27108	}
27109
27110	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27111	///
27112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27113	#[inline]
27114	#[target_feature(enable = "avx512f,avx512vl")]
27115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27116	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27117	pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27118	unsafe {
27119	let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
27120	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
27121	}
27122	}
27123
27124	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27125	///
27126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27127	#[inline]
27128	#[target_feature(enable = "avx512f,avx512vl")]
27129	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27130	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27131	pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27132	unsafe {
27133	let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
27134	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x4::ZERO))
27135	}
27136	}
27137
27138	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27139	///
27140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27141	#[inline]
27142	#[target_feature(enable = "avx512f,avx512vl")]
27143	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27144	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27145	pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27146	unsafe {
27147	let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
27148	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
27149	}
27150	}
27151
27152	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27153	///
27154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27155	#[inline]
27156	#[target_feature(enable = "avx512f,avx512vl")]
27157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27158	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27159	pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27160	unsafe {
27161	let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
27162	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x2::ZERO))
27163	}
27164	}
27165
27166	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27167	///
27168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27169	#[inline]
27170	#[target_feature(enable = "avx512f")]
27171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172	#[cfg_attr(test, assert_instr(vbroadcastss))]
27173	pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27174	unsafe { simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]) }
27175	}
27176
27177	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178	///
27179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27180	#[inline]
27181	#[target_feature(enable = "avx512f")]
27182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183	#[cfg_attr(test, assert_instr(vbroadcastss))]
27184	pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27185	unsafe {
27186	let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
27187	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
27188	}
27189	}
27190
27191	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27192	///
27193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27194	#[inline]
27195	#[target_feature(enable = "avx512f")]
27196	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27197	#[cfg_attr(test, assert_instr(vbroadcastss))]
27198	pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27199	unsafe {
27200	let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
27201	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
27202	}
27203	}
27204
27205	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27206	///
27207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27208	#[inline]
27209	#[target_feature(enable = "avx512f,avx512vl")]
27210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27211	#[cfg_attr(test, assert_instr(vbroadcastss))]
27212	pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27213	unsafe {
27214	let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
27215	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
27216	}
27217	}
27218
27219	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27220	///
27221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27222	#[inline]
27223	#[target_feature(enable = "avx512f,avx512vl")]
27224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27225	#[cfg_attr(test, assert_instr(vbroadcastss))]
27226	pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27227	unsafe {
27228	let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
27229	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
27230	}
27231	}
27232
27233	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27234	///
27235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27236	#[inline]
27237	#[target_feature(enable = "avx512f,avx512vl")]
27238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27239	#[cfg_attr(test, assert_instr(vbroadcastss))]
27240	pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27241	unsafe {
27242	let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
27243	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
27244	}
27245	}
27246
27247	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27248	///
27249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27250	#[inline]
27251	#[target_feature(enable = "avx512f,avx512vl")]
27252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27253	#[cfg_attr(test, assert_instr(vbroadcastss))]
27254	pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27255	unsafe {
27256	let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
27257	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x4::ZERO))
27258	}
27259	}
27260
27261	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27262	///
27263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27264	#[inline]
27265	#[target_feature(enable = "avx512f")]
27266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27267	#[cfg_attr(test, assert_instr(vbroadcastsd))]
27268	pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27269	unsafe { simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]) }
27270	}
27271
27272	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27273	///
27274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27275	#[inline]
27276	#[target_feature(enable = "avx512f")]
27277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27278	#[cfg_attr(test, assert_instr(vbroadcastsd))]
27279	pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27280	unsafe {
27281	let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
27282	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
27283	}
27284	}
27285
27286	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27287	///
27288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27289	#[inline]
27290	#[target_feature(enable = "avx512f")]
27291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27292	#[cfg_attr(test, assert_instr(vbroadcastsd))]
27293	pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27294	unsafe {
27295	let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
27296	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
27297	}
27298	}
27299
27300	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27301	///
27302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27303	#[inline]
27304	#[target_feature(enable = "avx512f,avx512vl")]
27305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27306	#[cfg_attr(test, assert_instr(vbroadcastsd))]
27307	pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27308	unsafe {
27309	let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
27310	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
27311	}
27312	}
27313
27314	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27315	///
27316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27317	#[inline]
27318	#[target_feature(enable = "avx512f,avx512vl")]
27319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27320	#[cfg_attr(test, assert_instr(vbroadcastsd))]
27321	pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27322	unsafe {
27323	let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
27324	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x4::ZERO))
27325	}
27326	}
27327
27328	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27329	///
27330	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27331	#[inline]
27332	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27333	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27334	pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27335	unsafe {
27336	let a: i32x4 = a.as_i32x4();
27337	let ret: i32x16 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]);
27338	transmute(src:ret)
27339	}
27340	}
27341
27342	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343	///
27344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27345	#[inline]
27346	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27348	pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27349	unsafe {
27350	let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
27351	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
27352	}
27353	}
27354
27355	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27356	///
27357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27358	#[inline]
27359	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27360	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27361	pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27362	unsafe {
27363	let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
27364	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
27365	}
27366	}
27367
27368	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27369	///
27370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27371	#[inline]
27372	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27374	pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27375	unsafe {
27376	let a: i32x4 = a.as_i32x4();
27377	let ret: i32x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]);
27378	transmute(src:ret)
27379	}
27380	}
27381
27382	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27383	///
27384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27385	#[inline]
27386	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27388	pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27389	unsafe {
27390	let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
27391	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
27392	}
27393	}
27394
27395	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27396	///
27397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27398	#[inline]
27399	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27400	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27401	pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27402	unsafe {
27403	let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
27404	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
27405	}
27406	}
27407
27408	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27409	///
27410	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27411	#[inline]
27412	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27413	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27414	pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27415	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
27416	}
27417
27418	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27419	///
27420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27421	#[inline]
27422	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27423	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27424	pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27425	unsafe {
27426	let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
27427	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
27428	}
27429	}
27430
27431	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27432	///
27433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27434	#[inline]
27435	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27436	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27437	pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27438	unsafe {
27439	let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
27440	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
27441	}
27442	}
27443
27444	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27445	///
27446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27447	#[inline]
27448	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27449	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27450	pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27451	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
27452	}
27453
27454	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455	///
27456	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27457	#[inline]
27458	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27459	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27460	pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27461	unsafe {
27462	let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
27463	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
27464	}
27465	}
27466
27467	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27468	///
27469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27470	#[inline]
27471	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27473	pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27474	unsafe {
27475	let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
27476	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
27477	}
27478	}
27479
27480	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27481	///
27482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27483	#[inline]
27484	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27485	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27486	pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27487	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
27488	}
27489
27490	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27491	///
27492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27493	#[inline]
27494	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27496	pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27497	unsafe {
27498	let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
27499	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
27500	}
27501	}
27502
27503	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27504	///
27505	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27506	#[inline]
27507	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27508	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27509	pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27510	unsafe {
27511	let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
27512	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
27513	}
27514	}
27515
27516	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27517	///
27518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27519	#[inline]
27520	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27521	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27522	pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27523	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
27524	}
27525
27526	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27527	///
27528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27529	#[inline]
27530	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27532	pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27533	unsafe {
27534	let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
27535	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
27536	}
27537	}
27538
27539	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27540	///
27541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27542	#[inline]
27543	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27545	pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27546	unsafe {
27547	let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
27548	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
27549	}
27550	}
27551
27552	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27553	///
27554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27555	#[inline]
27556	#[target_feature(enable = "avx512f")]
27557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27558	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27559	pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27560	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16())) }
27561	}
27562
27563	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27564	///
27565	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27566	#[inline]
27567	#[target_feature(enable = "avx512f,avx512vl")]
27568	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27569	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27570	pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27571	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8())) }
27572	}
27573
27574	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27575	///
27576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27577	#[inline]
27578	#[target_feature(enable = "avx512f,avx512vl")]
27579	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27580	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27581	pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27582	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4())) }
27583	}
27584
27585	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27586	///
27587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27588	#[inline]
27589	#[target_feature(enable = "avx512f")]
27590	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27591	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27592	pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27593	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8())) }
27594	}
27595
27596	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27597	///
27598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27599	#[inline]
27600	#[target_feature(enable = "avx512f,avx512vl")]
27601	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27602	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27603	pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27604	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4())) }
27605	}
27606
27607	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27608	///
27609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27610	#[inline]
27611	#[target_feature(enable = "avx512f,avx512vl")]
27612	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27613	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27614	pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27615	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2())) }
27616	}
27617
27618	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27619	///
27620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27621	#[inline]
27622	#[target_feature(enable = "avx512f")]
27623	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27624	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27625	pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27626	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16())) }
27627	}
27628
27629	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27630	///
27631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27632	#[inline]
27633	#[target_feature(enable = "avx512f,avx512vl")]
27634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27635	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27636	pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27637	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8())) }
27638	}
27639
27640	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27641	///
27642	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27643	#[inline]
27644	#[target_feature(enable = "avx512f,avx512vl")]
27645	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27646	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27647	pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27648	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4())) }
27649	}
27650
27651	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27652	///
27653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27654	#[inline]
27655	#[target_feature(enable = "avx512f")]
27656	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27657	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27658	pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27659	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8())) }
27660	}
27661
27662	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27663	///
27664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27665	#[inline]
27666	#[target_feature(enable = "avx512f,avx512vl")]
27667	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27668	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27669	pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27670	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4())) }
27671	}
27672
27673	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27674	///
27675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27676	#[inline]
27677	#[target_feature(enable = "avx512f,avx512vl")]
27678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27679	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27680	pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27681	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2())) }
27682	}
27683
27684	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27685	///
27686	/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27687	///
27688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27689	#[inline]
27690	#[target_feature(enable = "avx512f")]
27691	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27692	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27693	#[rustc_legacy_const_generics(`2`)]
27694	pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27695	unsafe {
27696	static_assert_uimm_bits!(IMM8, `8`);
27697	let a = a.as_i32x16();
27698	let b = b.as_i32x16();
27699	let imm8: i32 = IMM8 % `16`;
27700	let r: i32x16 = match imm8 {
27701	`0` => simd_shuffle!(
27702	a,
27703	b,
27704	[
27705	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
27706	],
27707	),
27708	`1` => simd_shuffle!(
27709	a,
27710	b,
27711	[
27712	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`,
27713	],
27714	),
27715	`2` => simd_shuffle!(
27716	a,
27717	b,
27718	[`18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`],
27719	),
27720	`3` => simd_shuffle!(
27721	a,
27722	b,
27723	[`19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`],
27724	),
27725	`4` => simd_shuffle!(
27726	a,
27727	b,
27728	[`20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`],
27729	),
27730	`5` => simd_shuffle!(
27731	a,
27732	b,
27733	[`21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`],
27734	),
27735	`6` => simd_shuffle!(
27736	a,
27737	b,
27738	[`22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`],
27739	),
27740	`7` => simd_shuffle!(
27741	a,
27742	b,
27743	[`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`],
27744	),
27745	`8` => simd_shuffle!(
27746	a,
27747	b,
27748	[`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`],
27749	),
27750	`9` => simd_shuffle!(
27751	a,
27752	b,
27753	[`25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
27754	),
27755	`10` => simd_shuffle!(a, b, [`26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`]),
27756	`11` => simd_shuffle!(a, b, [`27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`]),
27757	`12` => simd_shuffle!(a, b, [`28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`]),
27758	`13` => simd_shuffle!(a, b, [`29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`]),
27759	`14` => simd_shuffle!(a, b, [`30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`]),
27760	`15` => simd_shuffle!(a, b, [`31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`]),
27761	_ => unreachable_unchecked(),
27762	};
27763	transmute(r)
27764	}
27765	}
27766
27767	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27768	///
27769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27770	#[inline]
27771	#[target_feature(enable = "avx512f")]
27772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27773	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27774	#[rustc_legacy_const_generics(`4`)]
27775	pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27776	src: __m512i,
27777	k: __mmask16,
27778	a: __m512i,
27779	b: __m512i,
27780	) -> __m512i {
27781	unsafe {
27782	static_assert_uimm_bits!(IMM8, `8`);
27783	let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
27784	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
27785	}
27786	}
27787
27788	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27789	///
27790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27791	#[inline]
27792	#[target_feature(enable = "avx512f")]
27793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27794	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27795	#[rustc_legacy_const_generics(`3`)]
27796	pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27797	unsafe {
27798	static_assert_uimm_bits!(IMM8, `8`);
27799	let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
27800	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
27801	}
27802	}
27803
27804	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27805	///
27806	/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27807	///
27808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27809	#[inline]
27810	#[target_feature(enable = "avx512f,avx512vl")]
27811	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27812	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27813	#[rustc_legacy_const_generics(`2`)]
27814	pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27815	unsafe {
27816	static_assert_uimm_bits!(IMM8, `8`);
27817	let a: i32x8 = a.as_i32x8();
27818	let b: i32x8 = b.as_i32x8();
27819	let imm8: i32 = IMM8 % `8`;
27820	let r: i32x8 = match imm8 {
27821	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
27822	`1` => simd_shuffle!(a, b, [`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`]),
27823	`2` => simd_shuffle!(a, b, [`10`, `11`, `12`, `13`, `14`, `15`, `0`, `1`]),
27824	`3` => simd_shuffle!(a, b, [`11`, `12`, `13`, `14`, `15`, `0`, `1`, `2`]),
27825	`4` => simd_shuffle!(a, b, [`12`, `13`, `14`, `15`, `0`, `1`, `2`, `3`]),
27826	`5` => simd_shuffle!(a, b, [`13`, `14`, `15`, `0`, `1`, `2`, `3`, `4`]),
27827	`6` => simd_shuffle!(a, b, [`14`, `15`, `0`, `1`, `2`, `3`, `4`, `5`]),
27828	`7` => simd_shuffle!(a, b, [`15`, `0`, `1`, `2`, `3`, `4`, `5`, `6`]),
27829	_ => unreachable_unchecked(),
27830	};
27831	transmute(src:r)
27832	}
27833	}
27834
27835	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27836	///
27837	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27838	#[inline]
27839	#[target_feature(enable = "avx512f,avx512vl")]
27840	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27841	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27842	#[rustc_legacy_const_generics(`4`)]
27843	pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27844	src: __m256i,
27845	k: __mmask8,
27846	a: __m256i,
27847	b: __m256i,
27848	) -> __m256i {
27849	unsafe {
27850	static_assert_uimm_bits!(IMM8, `8`);
27851	let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
27852	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
27853	}
27854	}
27855
27856	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27857	///
27858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27859	#[inline]
27860	#[target_feature(enable = "avx512f,avx512vl")]
27861	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27862	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27863	#[rustc_legacy_const_generics(`3`)]
27864	pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27865	unsafe {
27866	static_assert_uimm_bits!(IMM8, `8`);
27867	let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
27868	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
27869	}
27870	}
27871
27872	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27873	///
27874	/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27875	///
27876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27877	#[inline]
27878	#[target_feature(enable = "avx512f,avx512vl")]
27879	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27880	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `1`))] //should be valignd
27881	#[rustc_legacy_const_generics(`2`)]
27882	pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27883	unsafe {
27884	static_assert_uimm_bits!(IMM8, `8`);
27885	let a: i32x4 = a.as_i32x4();
27886	let b: i32x4 = b.as_i32x4();
27887	let imm8: i32 = IMM8 % `4`;
27888	let r: i32x4 = match imm8 {
27889	`0` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`]),
27890	`1` => simd_shuffle!(a, b, [`5`, `6`, `7`, `0`]),
27891	`2` => simd_shuffle!(a, b, [`6`, `7`, `0`, `1`]),
27892	`3` => simd_shuffle!(a, b, [`7`, `0`, `1`, `2`]),
27893	_ => unreachable_unchecked(),
27894	};
27895	transmute(src:r)
27896	}
27897	}
27898
27899	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27900	///
27901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27902	#[inline]
27903	#[target_feature(enable = "avx512f,avx512vl")]
27904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27905	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27906	#[rustc_legacy_const_generics(`4`)]
27907	pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27908	src: __m128i,
27909	k: __mmask8,
27910	a: __m128i,
27911	b: __m128i,
27912	) -> __m128i {
27913	unsafe {
27914	static_assert_uimm_bits!(IMM8, `8`);
27915	let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
27916	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
27917	}
27918	}
27919
27920	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27921	///
27922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27923	#[inline]
27924	#[target_feature(enable = "avx512f,avx512vl")]
27925	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27926	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
27927	#[rustc_legacy_const_generics(`3`)]
27928	pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27929	unsafe {
27930	static_assert_uimm_bits!(IMM8, `8`);
27931	let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
27932	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
27933	}
27934	}
27935
27936	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27937	///
27938	/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27939	///
27940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27941	#[inline]
27942	#[target_feature(enable = "avx512f")]
27943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27944	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
27945	#[rustc_legacy_const_generics(`2`)]
27946	pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27947	unsafe {
27948	static_assert_uimm_bits!(IMM8, `8`);
27949	let imm8: i32 = IMM8 % `8`;
27950	let r: i64x8 = match imm8 {
27951	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
27952	`1` => simd_shuffle!(a, b, [`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`]),
27953	`2` => simd_shuffle!(a, b, [`10`, `11`, `12`, `13`, `14`, `15`, `0`, `1`]),
27954	`3` => simd_shuffle!(a, b, [`11`, `12`, `13`, `14`, `15`, `0`, `1`, `2`]),
27955	`4` => simd_shuffle!(a, b, [`12`, `13`, `14`, `15`, `0`, `1`, `2`, `3`]),
27956	`5` => simd_shuffle!(a, b, [`13`, `14`, `15`, `0`, `1`, `2`, `3`, `4`]),
27957	`6` => simd_shuffle!(a, b, [`14`, `15`, `0`, `1`, `2`, `3`, `4`, `5`]),
27958	`7` => simd_shuffle!(a, b, [`15`, `0`, `1`, `2`, `3`, `4`, `5`, `6`]),
27959	_ => unreachable_unchecked(),
27960	};
27961	transmute(src:r)
27962	}
27963	}
27964
27965	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27966	///
27967	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27968	#[inline]
27969	#[target_feature(enable = "avx512f")]
27970	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27971	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
27972	#[rustc_legacy_const_generics(`4`)]
27973	pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27974	src: __m512i,
27975	k: __mmask8,
27976	a: __m512i,
27977	b: __m512i,
27978	) -> __m512i {
27979	unsafe {
27980	static_assert_uimm_bits!(IMM8, `8`);
27981	let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
27982	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
27983	}
27984	}
27985
27986	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27987	///
27988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
27989	#[inline]
27990	#[target_feature(enable = "avx512f")]
27991	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27992	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
27993	#[rustc_legacy_const_generics(`3`)]
27994	pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27995	unsafe {
27996	static_assert_uimm_bits!(IMM8, `8`);
27997	let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
27998	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
27999	}
28000	}
28001
28002	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28003	///
28004	/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28005	///
28006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28007	#[inline]
28008	#[target_feature(enable = "avx512f,avx512vl")]
28009	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28010	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
28011	#[rustc_legacy_const_generics(`2`)]
28012	pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28013	unsafe {
28014	static_assert_uimm_bits!(IMM8, `8`);
28015	let imm8: i32 = IMM8 % `4`;
28016	let r: i64x4 = match imm8 {
28017	`0` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`]),
28018	`1` => simd_shuffle!(a, b, [`5`, `6`, `7`, `0`]),
28019	`2` => simd_shuffle!(a, b, [`6`, `7`, `0`, `1`]),
28020	`3` => simd_shuffle!(a, b, [`7`, `0`, `1`, `2`]),
28021	_ => unreachable_unchecked(),
28022	};
28023	transmute(src:r)
28024	}
28025	}
28026
28027	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28028	///
28029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28030	#[inline]
28031	#[target_feature(enable = "avx512f,avx512vl")]
28032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28033	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
28034	#[rustc_legacy_const_generics(`4`)]
28035	pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28036	src: __m256i,
28037	k: __mmask8,
28038	a: __m256i,
28039	b: __m256i,
28040	) -> __m256i {
28041	unsafe {
28042	static_assert_uimm_bits!(IMM8, `8`);
28043	let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
28044	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
28045	}
28046	}
28047
28048	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28049	///
28050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28051	#[inline]
28052	#[target_feature(enable = "avx512f,avx512vl")]
28053	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28054	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
28055	#[rustc_legacy_const_generics(`3`)]
28056	pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28057	unsafe {
28058	static_assert_uimm_bits!(IMM8, `8`);
28059	let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
28060	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
28061	}
28062	}
28063
28064	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28065	///
28066	/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28067	///
28068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28069	#[inline]
28070	#[target_feature(enable = "avx512f,avx512vl")]
28071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28072	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `1`))] //should be valignq
28073	#[rustc_legacy_const_generics(`2`)]
28074	pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28075	unsafe {
28076	static_assert_uimm_bits!(IMM8, `8`);
28077	let imm8: i32 = IMM8 % `2`;
28078	let r: i64x2 = match imm8 {
28079	`0` => simd_shuffle!(a, b, [`2`, `3`]),
28080	`1` => simd_shuffle!(a, b, [`3`, `0`]),
28081	_ => unreachable_unchecked(),
28082	};
28083	transmute(src:r)
28084	}
28085	}
28086
28087	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28088	///
28089	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28090	#[inline]
28091	#[target_feature(enable = "avx512f,avx512vl")]
28092	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28093	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
28094	#[rustc_legacy_const_generics(`4`)]
28095	pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28096	src: __m128i,
28097	k: __mmask8,
28098	a: __m128i,
28099	b: __m128i,
28100	) -> __m128i {
28101	unsafe {
28102	static_assert_uimm_bits!(IMM8, `8`);
28103	let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
28104	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
28105	}
28106	}
28107
28108	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28109	///
28110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28111	#[inline]
28112	#[target_feature(enable = "avx512f,avx512vl")]
28113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28114	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
28115	#[rustc_legacy_const_generics(`3`)]
28116	pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28117	unsafe {
28118	static_assert_uimm_bits!(IMM8, `8`);
28119	let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
28120	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:i64x2::ZERO))
28121	}
28122	}
28123
28124	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28125	///
28126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28127	#[inline]
28128	#[target_feature(enable = "avx512f")]
28129	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28130	#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28131	pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28132	unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
28133	}
28134
28135	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136	///
28137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28138	#[inline]
28139	#[target_feature(enable = "avx512f")]
28140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28141	#[cfg_attr(test, assert_instr(vpandd))]
28142	pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28143	unsafe {
28144	let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
28145	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
28146	}
28147	}
28148
28149	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28150	///
28151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28152	#[inline]
28153	#[target_feature(enable = "avx512f")]
28154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28155	#[cfg_attr(test, assert_instr(vpandd))]
28156	pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28157	unsafe {
28158	let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
28159	transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x16::ZERO))
28160	}
28161	}
28162
28163	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28164	///
28165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28166	#[inline]
28167	#[target_feature(enable = "avx512f,avx512vl")]
28168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28169	#[cfg_attr(test, assert_instr(vpandd))]
28170	pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28171	unsafe {
28172	let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
28173	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
28174	}
28175	}
28176
28177	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28178	///
28179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28180	#[inline]
28181	#[target_feature(enable = "avx512f,avx512vl")]
28182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28183	#[cfg_attr(test, assert_instr(vpandd))]
28184	pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28185	unsafe {
28186	let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
28187	transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x8::ZERO))
28188	}
28189	}
28190
28191	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28192	///
28193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28194	#[inline]
28195	#[target_feature(enable = "avx512f,avx512vl")]
28196	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28197	#[cfg_attr(test, assert_instr(vpandd))]
28198	pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28199	unsafe {
28200	let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
28201	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
28202	}
28203	}
28204
28205	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28206	///
28207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28208	#[inline]
28209	#[target_feature(enable = "avx512f,avx512vl")]
28210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28211	#[cfg_attr(test, assert_instr(vpandd))]
28212	pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28213	unsafe {
28214	let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
28215	transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x4::ZERO))
28216	}
28217	}
28218
28219	/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28220	///
28221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28222	#[inline]
28223	#[target_feature(enable = "avx512f")]
28224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28225	#[cfg_attr(test, assert_instr(vpandq))]
28226	pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28227	unsafe { transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8())) }
28228	}
28229
28230	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28231	///
28232	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28233	#[inline]
28234	#[target_feature(enable = "avx512f")]
28235	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28236	#[cfg_attr(test, assert_instr(vpandq))]
28237	pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28238	unsafe {
28239	let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
28240	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
28241	}
28242	}
28243
28244	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28245	///
28246	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28247	#[inline]
28248	#[target_feature(enable = "avx512f")]
28249	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28250	#[cfg_attr(test, assert_instr(vpandq))]
28251	pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28252	unsafe {
28253	let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
28254	transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x8::ZERO))
28255	}
28256	}
28257
28258	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28259	///
28260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28261	#[inline]
28262	#[target_feature(enable = "avx512f,avx512vl")]
28263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28264	#[cfg_attr(test, assert_instr(vpandq))]
28265	pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28266	unsafe {
28267	let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
28268	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
28269	}
28270	}
28271
28272	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28273	///
28274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28275	#[inline]
28276	#[target_feature(enable = "avx512f,avx512vl")]
28277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28278	#[cfg_attr(test, assert_instr(vpandq))]
28279	pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28280	unsafe {
28281	let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
28282	transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x4::ZERO))
28283	}
28284	}
28285
28286	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28287	///
28288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28289	#[inline]
28290	#[target_feature(enable = "avx512f,avx512vl")]
28291	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28292	#[cfg_attr(test, assert_instr(vpandq))]
28293	pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28294	unsafe {
28295	let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
28296	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
28297	}
28298	}
28299
28300	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28301	///
28302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28303	#[inline]
28304	#[target_feature(enable = "avx512f,avx512vl")]
28305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28306	#[cfg_attr(test, assert_instr(vpandq))]
28307	pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28308	unsafe {
28309	let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
28310	transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x2::ZERO))
28311	}
28312	}
28313
28314	/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28315	///
28316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28317	#[inline]
28318	#[target_feature(enable = "avx512f")]
28319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28320	#[cfg_attr(test, assert_instr(vpandq))]
28321	pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28322	unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
28323	}
28324
28325	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28326	///
28327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28328	#[inline]
28329	#[target_feature(enable = "avx512f")]
28330	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28331	#[cfg_attr(test, assert_instr(vporq))]
28332	pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28333	unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
28334	}
28335
28336	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28337	///
28338	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28339	#[inline]
28340	#[target_feature(enable = "avx512f")]
28341	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28342	#[cfg_attr(test, assert_instr(vpord))]
28343	pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28344	unsafe {
28345	let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
28346	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
28347	}
28348	}
28349
28350	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28351	///
28352	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28353	#[inline]
28354	#[target_feature(enable = "avx512f")]
28355	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28356	#[cfg_attr(test, assert_instr(vpord))]
28357	pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28358	unsafe {
28359	let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
28360	transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x16::ZERO))
28361	}
28362	}
28363
28364	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28365	///
28366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28367	#[inline]
28368	#[target_feature(enable = "avx512f,avx512vl")]
28369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28370	#[cfg_attr(test, assert_instr(vor))] //should be vpord
28371	pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28372	unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
28373	}
28374
28375	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28376	///
28377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28378	#[inline]
28379	#[target_feature(enable = "avx512f,avx512vl")]
28380	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28381	#[cfg_attr(test, assert_instr(vpord))]
28382	pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28383	unsafe {
28384	let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
28385	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
28386	}
28387	}
28388
28389	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28390	///
28391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28392	#[inline]
28393	#[target_feature(enable = "avx512f,avx512vl")]
28394	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28395	#[cfg_attr(test, assert_instr(vpord))]
28396	pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28397	unsafe {
28398	let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
28399	transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x8::ZERO))
28400	}
28401	}
28402
28403	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28404	///
28405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28406	#[inline]
28407	#[target_feature(enable = "avx512f,avx512vl")]
28408	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28409	#[cfg_attr(test, assert_instr(vor))] //should be vpord
28410	pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28411	unsafe { transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4())) }
28412	}
28413
28414	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28415	///
28416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28417	#[inline]
28418	#[target_feature(enable = "avx512f,avx512vl")]
28419	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28420	#[cfg_attr(test, assert_instr(vpord))]
28421	pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28422	unsafe {
28423	let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
28424	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
28425	}
28426	}
28427
28428	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28429	///
28430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28431	#[inline]
28432	#[target_feature(enable = "avx512f,avx512vl")]
28433	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28434	#[cfg_attr(test, assert_instr(vpord))]
28435	pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28436	unsafe {
28437	let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
28438	transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x4::ZERO))
28439	}
28440	}
28441
28442	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28443	///
28444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28445	#[inline]
28446	#[target_feature(enable = "avx512f")]
28447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28448	#[cfg_attr(test, assert_instr(vporq))]
28449	pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28450	unsafe { transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8())) }
28451	}
28452
28453	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28454	///
28455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28456	#[inline]
28457	#[target_feature(enable = "avx512f")]
28458	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28459	#[cfg_attr(test, assert_instr(vporq))]
28460	pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28461	unsafe {
28462	let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
28463	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
28464	}
28465	}
28466
28467	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28468	///
28469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28470	#[inline]
28471	#[target_feature(enable = "avx512f")]
28472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28473	#[cfg_attr(test, assert_instr(vporq))]
28474	pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28475	unsafe {
28476	let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
28477	transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x8::ZERO))
28478	}
28479	}
28480
28481	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28482	///
28483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28484	#[inline]
28485	#[target_feature(enable = "avx512f,avx512vl")]
28486	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28487	#[cfg_attr(test, assert_instr(vor))] //should be vporq
28488	pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28489	unsafe { transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4())) }
28490	}
28491
28492	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28493	///
28494	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28495	#[inline]
28496	#[target_feature(enable = "avx512f,avx512vl")]
28497	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28498	#[cfg_attr(test, assert_instr(vporq))]
28499	pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28500	unsafe {
28501	let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
28502	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
28503	}
28504	}
28505
28506	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28507	///
28508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28509	#[inline]
28510	#[target_feature(enable = "avx512f,avx512vl")]
28511	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28512	#[cfg_attr(test, assert_instr(vporq))]
28513	pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28514	unsafe {
28515	let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
28516	transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x4::ZERO))
28517	}
28518	}
28519
28520	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28521	///
28522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28523	#[inline]
28524	#[target_feature(enable = "avx512f,avx512vl")]
28525	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28526	#[cfg_attr(test, assert_instr(vor))] //should be vporq
28527	pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28528	unsafe { transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2())) }
28529	}
28530
28531	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28532	///
28533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28534	#[inline]
28535	#[target_feature(enable = "avx512f,avx512vl")]
28536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28537	#[cfg_attr(test, assert_instr(vporq))]
28538	pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28539	unsafe {
28540	let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
28541	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
28542	}
28543	}
28544
28545	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28546	///
28547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28548	#[inline]
28549	#[target_feature(enable = "avx512f,avx512vl")]
28550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28551	#[cfg_attr(test, assert_instr(vporq))]
28552	pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28553	unsafe {
28554	let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
28555	transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x2::ZERO))
28556	}
28557	}
28558
28559	/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28560	///
28561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28562	#[inline]
28563	#[target_feature(enable = "avx512f")]
28564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28565	#[cfg_attr(test, assert_instr(vporq))]
28566	pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28567	unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
28568	}
28569
28570	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28571	///
28572	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28573	#[inline]
28574	#[target_feature(enable = "avx512f")]
28575	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28576	#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28577	pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28578	unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
28579	}
28580
28581	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28582	///
28583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28584	#[inline]
28585	#[target_feature(enable = "avx512f")]
28586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28587	#[cfg_attr(test, assert_instr(vpxord))]
28588	pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28589	unsafe {
28590	let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
28591	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
28592	}
28593	}
28594
28595	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28596	///
28597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28598	#[inline]
28599	#[target_feature(enable = "avx512f")]
28600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28601	#[cfg_attr(test, assert_instr(vpxord))]
28602	pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28603	unsafe {
28604	let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
28605	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x16::ZERO))
28606	}
28607	}
28608
28609	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28610	///
28611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28612	#[inline]
28613	#[target_feature(enable = "avx512f,avx512vl")]
28614	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28615	#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28616	pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28617	unsafe { transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8())) }
28618	}
28619
28620	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28621	///
28622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28623	#[inline]
28624	#[target_feature(enable = "avx512f,avx512vl")]
28625	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28626	#[cfg_attr(test, assert_instr(vpxord))]
28627	pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28628	unsafe {
28629	let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
28630	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
28631	}
28632	}
28633
28634	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28635	///
28636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28637	#[inline]
28638	#[target_feature(enable = "avx512f,avx512vl")]
28639	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28640	#[cfg_attr(test, assert_instr(vpxord))]
28641	pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28642	unsafe {
28643	let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
28644	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x8::ZERO))
28645	}
28646	}
28647
28648	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28649	///
28650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28651	#[inline]
28652	#[target_feature(enable = "avx512f,avx512vl")]
28653	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28654	#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28655	pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28656	unsafe { transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4())) }
28657	}
28658
28659	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28660	///
28661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28662	#[inline]
28663	#[target_feature(enable = "avx512f,avx512vl")]
28664	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28665	#[cfg_attr(test, assert_instr(vpxord))]
28666	pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28667	unsafe {
28668	let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
28669	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
28670	}
28671	}
28672
28673	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28674	///
28675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28676	#[inline]
28677	#[target_feature(enable = "avx512f,avx512vl")]
28678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28679	#[cfg_attr(test, assert_instr(vpxord))]
28680	pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28681	unsafe {
28682	let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
28683	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x4::ZERO))
28684	}
28685	}
28686
28687	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28688	///
28689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28690	#[inline]
28691	#[target_feature(enable = "avx512f")]
28692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28693	#[cfg_attr(test, assert_instr(vpxorq))]
28694	pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28695	unsafe { transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8())) }
28696	}
28697
28698	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28699	///
28700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28701	#[inline]
28702	#[target_feature(enable = "avx512f")]
28703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28704	#[cfg_attr(test, assert_instr(vpxorq))]
28705	pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28706	unsafe {
28707	let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
28708	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
28709	}
28710	}
28711
28712	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28713	///
28714	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28715	#[inline]
28716	#[target_feature(enable = "avx512f")]
28717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28718	#[cfg_attr(test, assert_instr(vpxorq))]
28719	pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28720	unsafe {
28721	let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
28722	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x8::ZERO))
28723	}
28724	}
28725
28726	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28727	///
28728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28729	#[inline]
28730	#[target_feature(enable = "avx512f,avx512vl")]
28731	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28732	#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28733	pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28734	unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
28735	}
28736
28737	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28738	///
28739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28740	#[inline]
28741	#[target_feature(enable = "avx512f,avx512vl")]
28742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28743	#[cfg_attr(test, assert_instr(vpxorq))]
28744	pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28745	unsafe {
28746	let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
28747	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
28748	}
28749	}
28750
28751	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28752	///
28753	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28754	#[inline]
28755	#[target_feature(enable = "avx512f,avx512vl")]
28756	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28757	#[cfg_attr(test, assert_instr(vpxorq))]
28758	pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28759	unsafe {
28760	let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
28761	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x4::ZERO))
28762	}
28763	}
28764
28765	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28766	///
28767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28768	#[inline]
28769	#[target_feature(enable = "avx512f,avx512vl")]
28770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28771	#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28772	pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28773	unsafe { transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2())) }
28774	}
28775
28776	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28777	///
28778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28779	#[inline]
28780	#[target_feature(enable = "avx512f,avx512vl")]
28781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28782	#[cfg_attr(test, assert_instr(vpxorq))]
28783	pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28784	unsafe {
28785	let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
28786	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
28787	}
28788	}
28789
28790	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28791	///
28792	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28793	#[inline]
28794	#[target_feature(enable = "avx512f,avx512vl")]
28795	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28796	#[cfg_attr(test, assert_instr(vpxorq))]
28797	pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28798	unsafe {
28799	let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
28800	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x2::ZERO))
28801	}
28802	}
28803
28804	/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28805	///
28806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28807	#[inline]
28808	#[target_feature(enable = "avx512f")]
28809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28810	#[cfg_attr(test, assert_instr(vpxorq))]
28811	pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28812	unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
28813	}
28814
28815	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28816	///
28817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28818	#[inline]
28819	#[target_feature(enable = "avx512f")]
28820	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28821	#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28822	pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28823	_mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
28824	}
28825
28826	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28827	///
28828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28829	#[inline]
28830	#[target_feature(enable = "avx512f")]
28831	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28832	#[cfg_attr(test, assert_instr(vpandnd))]
28833	pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28834	unsafe {
28835	let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
28836	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
28837	}
28838	}
28839
28840	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28841	///
28842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28843	#[inline]
28844	#[target_feature(enable = "avx512f")]
28845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28846	#[cfg_attr(test, assert_instr(vpandnd))]
28847	pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28848	unsafe {
28849	let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
28850	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x16::ZERO))
28851	}
28852	}
28853
28854	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28855	///
28856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28857	#[inline]
28858	#[target_feature(enable = "avx512f,avx512vl")]
28859	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28860	#[cfg_attr(test, assert_instr(vpandnd))]
28861	pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28862	unsafe {
28863	let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
28864	let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
28865	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
28866	}
28867	}
28868
28869	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28870	///
28871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28872	#[inline]
28873	#[target_feature(enable = "avx512f,avx512vl")]
28874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28875	#[cfg_attr(test, assert_instr(vpandnd))]
28876	pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28877	unsafe {
28878	let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
28879	let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
28880	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x8::ZERO))
28881	}
28882	}
28883
28884	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28885	///
28886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28887	#[inline]
28888	#[target_feature(enable = "avx512f,avx512vl")]
28889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28890	#[cfg_attr(test, assert_instr(vpandnd))]
28891	pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28892	unsafe {
28893	let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
28894	let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
28895	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
28896	}
28897	}
28898
28899	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28900	///
28901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28902	#[inline]
28903	#[target_feature(enable = "avx512f,avx512vl")]
28904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28905	#[cfg_attr(test, assert_instr(vpandnd))]
28906	pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28907	unsafe {
28908	let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
28909	let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
28910	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x4::ZERO))
28911	}
28912	}
28913
28914	/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28915	///
28916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28917	#[inline]
28918	#[target_feature(enable = "avx512f")]
28919	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28920	#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28921	pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28922	_mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
28923	}
28924
28925	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28926	///
28927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28928	#[inline]
28929	#[target_feature(enable = "avx512f")]
28930	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28931	#[cfg_attr(test, assert_instr(vpandnq))]
28932	pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28933	unsafe {
28934	let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
28935	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
28936	}
28937	}
28938
28939	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28940	///
28941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28942	#[inline]
28943	#[target_feature(enable = "avx512f")]
28944	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28945	#[cfg_attr(test, assert_instr(vpandnq))]
28946	pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28947	unsafe {
28948	let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
28949	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x8::ZERO))
28950	}
28951	}
28952
28953	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28954	///
28955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28956	#[inline]
28957	#[target_feature(enable = "avx512f,avx512vl")]
28958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28959	#[cfg_attr(test, assert_instr(vpandnq))]
28960	pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28961	unsafe {
28962	let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
28963	let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
28964	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
28965	}
28966	}
28967
28968	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28969	///
28970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28971	#[inline]
28972	#[target_feature(enable = "avx512f,avx512vl")]
28973	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28974	#[cfg_attr(test, assert_instr(vpandnq))]
28975	pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28976	unsafe {
28977	let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
28978	let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
28979	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x4::ZERO))
28980	}
28981	}
28982
28983	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28984	///
28985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
28986	#[inline]
28987	#[target_feature(enable = "avx512f,avx512vl")]
28988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28989	#[cfg_attr(test, assert_instr(vpandnq))]
28990	pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28991	unsafe {
28992	let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
28993	let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
28994	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
28995	}
28996	}
28997
28998	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28999	///
29000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29001	#[inline]
29002	#[target_feature(enable = "avx512f,avx512vl")]
29003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29004	#[cfg_attr(test, assert_instr(vpandnq))]
29005	pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29006	unsafe {
29007	let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
29008	let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
29009	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x2::ZERO))
29010	}
29011	}
29012
29013	/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29014	///
29015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29016	#[inline]
29017	#[target_feature(enable = "avx512f")]
29018	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29019	#[cfg_attr(test, assert_instr(vpandnq))]
29020	pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29021	_mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
29022	}
29023
29024	/// Convert 16-bit mask a into an integer value, and store the result in dst.
29025	///
29026	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29027	#[inline]
29028	#[target_feature(enable = "avx512f")]
29029	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29030	pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29031	a as u32
29032	}
29033
29034	/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29035	///
29036	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29037	#[inline]
29038	#[target_feature(enable = "avx512f")]
29039	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29040	pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29041	a as __mmask16
29042	}
29043
29044	/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29045	///
29046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29047	#[inline]
29048	#[target_feature(enable = "avx512f")]
29049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29050	#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29051	pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29052	a & b
29053	}
29054
29055	/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29056	///
29057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29058	#[inline]
29059	#[target_feature(enable = "avx512f")]
29060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29061	#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29062	pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29063	a & b
29064	}
29065
29066	/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29067	///
29068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29069	#[inline]
29070	#[target_feature(enable = "avx512f")]
29071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29072	#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29073	pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29074	a \| b
29075	}
29076
29077	/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29078	///
29079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29080	#[inline]
29081	#[target_feature(enable = "avx512f")]
29082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29083	#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29084	pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29085	a \| b
29086	}
29087
29088	/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29089	///
29090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29091	#[inline]
29092	#[target_feature(enable = "avx512f")]
29093	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29094	#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29095	pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29096	a ^ b
29097	}
29098
29099	/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29100	///
29101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29102	#[inline]
29103	#[target_feature(enable = "avx512f")]
29104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29105	#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29106	pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29107	a ^ b
29108	}
29109
29110	/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29111	///
29112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29113	#[inline]
29114	#[target_feature(enable = "avx512f")]
29115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29116	pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29117	a ^ `0b11111111_11111111`
29118	}
29119
29120	/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29121	///
29122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29123	#[inline]
29124	#[target_feature(enable = "avx512f")]
29125	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29126	pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29127	a ^ `0b11111111_11111111`
29128	}
29129
29130	/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29131	///
29132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29133	#[inline]
29134	#[target_feature(enable = "avx512f")]
29135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29136	#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29137	pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29138	_mm512_kand(a:_mm512_knot(a), b)
29139	}
29140
29141	/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29142	///
29143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29144	#[inline]
29145	#[target_feature(enable = "avx512f")]
29146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29147	#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29148	pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29149	_mm512_kand(a:_mm512_knot(a), b)
29150	}
29151
29152	/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29153	///
29154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29155	#[inline]
29156	#[target_feature(enable = "avx512f")]
29157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29158	#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29159	pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29160	_mm512_knot(_mm512_kxor(a, b))
29161	}
29162
29163	/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29164	///
29165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29166	#[inline]
29167	#[target_feature(enable = "avx512f")]
29168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29169	#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29170	pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29171	_mm512_knot(_mm512_kxor(a, b))
29172	}
29173
29174	/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29175	/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29176	///
29177	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29178	#[inline]
29179	#[target_feature(enable = "avx512f")]
29180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29181	pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29182	let tmp: u16 = _kor_mask16(a, b);
29183	*all_ones = (tmp == `0xffff`) as u8;
29184	(tmp == `0`) as u8
29185	}
29186
29187	/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29188	/// store 0 in dst.
29189	///
29190	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29191	#[inline]
29192	#[target_feature(enable = "avx512f")]
29193	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29194	pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29195	(_kor_mask16(a, b) == `0xffff`) as u8
29196	}
29197
29198	/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29199	/// store 0 in dst.
29200	///
29201	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29202	#[inline]
29203	#[target_feature(enable = "avx512f")]
29204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29205	pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29206	(_kor_mask16(a, b) == `0`) as u8
29207	}
29208
29209	/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29210	///
29211	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29212	#[inline]
29213	#[target_feature(enable = "avx512f")]
29214	#[rustc_legacy_const_generics(`1`)]
29215	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29216	pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29217	a << COUNT
29218	}
29219
29220	/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29221	///
29222	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29223	#[inline]
29224	#[target_feature(enable = "avx512f")]
29225	#[rustc_legacy_const_generics(`1`)]
29226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29227	pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29228	a >> COUNT
29229	}
29230
29231	/// Load 16-bit mask from memory
29232	///
29233	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29234	#[inline]
29235	#[target_feature(enable = "avx512f")]
29236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29237	pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29238	*mem_addr
29239	}
29240
29241	/// Store 16-bit mask to memory
29242	///
29243	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29244	#[inline]
29245	#[target_feature(enable = "avx512f")]
29246	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29247	pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29248	*mem_addr = a;
29249	}
29250
29251	/// Copy 16-bit mask a to k.
29252	///
29253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29254	#[inline]
29255	#[target_feature(enable = "avx512f")]
29256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29257	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29258	pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29259	a
29260	}
29261
29262	/// Converts integer mask into bitmask, storing the result in dst.
29263	///
29264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29265	#[inline]
29266	#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29268	pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29269	mask as u16
29270	}
29271
29272	/// Converts bit mask k1 into an integer value, storing the results in dst.
29273	///
29274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29275	#[inline]
29276	#[target_feature(enable = "avx512f")]
29277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29278	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29279	pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29280	k1 as i32
29281	}
29282
29283	/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29284	///
29285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29286	#[inline]
29287	#[target_feature(enable = "avx512f")]
29288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29289	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29290	pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29291	((a & `0xff`) << `8`) \| (b & `0xff`)
29292	}
29293
29294	/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29295	///
29296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29297	#[inline]
29298	#[target_feature(enable = "avx512f")]
29299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29300	#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29301	pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29302	let r: bool = (a \| b) == `0b11111111_11111111`;
29303	r as i32
29304	}
29305
29306	/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29307	///
29308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29309	#[inline]
29310	#[target_feature(enable = "avx512f")]
29311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29312	#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29313	pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29314	let r: bool = (a \| b) == `0`;
29315	r as i32
29316	}
29317
29318	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29319	///
29320	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29321	#[inline]
29322	#[target_feature(enable = "avx512f")]
29323	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29324	#[cfg_attr(test, assert_instr(vptestmd))]
29325	pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29326	let and: __m512i = _mm512_and_epi32(a, b);
29327	let zero: __m512i = _mm512_setzero_si512();
29328	_mm512_cmpneq_epi32_mask(a:and, b:zero)
29329	}
29330
29331	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29332	///
29333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29334	#[inline]
29335	#[target_feature(enable = "avx512f")]
29336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29337	#[cfg_attr(test, assert_instr(vptestmd))]
29338	pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29339	let and: __m512i = _mm512_and_epi32(a, b);
29340	let zero: __m512i = _mm512_setzero_si512();
29341	_mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29342	}
29343
29344	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29345	///
29346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29347	#[inline]
29348	#[target_feature(enable = "avx512f,avx512vl")]
29349	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29350	#[cfg_attr(test, assert_instr(vptestmd))]
29351	pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29352	let and: __m256i = _mm256_and_si256(a, b);
29353	let zero: __m256i = _mm256_setzero_si256();
29354	_mm256_cmpneq_epi32_mask(a:and, b:zero)
29355	}
29356
29357	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29358	///
29359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29360	#[inline]
29361	#[target_feature(enable = "avx512f,avx512vl")]
29362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29363	#[cfg_attr(test, assert_instr(vptestmd))]
29364	pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29365	let and: __m256i = _mm256_and_si256(a, b);
29366	let zero: __m256i = _mm256_setzero_si256();
29367	_mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29368	}
29369
29370	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29371	///
29372	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29373	#[inline]
29374	#[target_feature(enable = "avx512f,avx512vl")]
29375	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29376	#[cfg_attr(test, assert_instr(vptestmd))]
29377	pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29378	let and: __m128i = _mm_and_si128(a, b);
29379	let zero: __m128i = _mm_setzero_si128();
29380	_mm_cmpneq_epi32_mask(a:and, b:zero)
29381	}
29382
29383	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29384	///
29385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29386	#[inline]
29387	#[target_feature(enable = "avx512f,avx512vl")]
29388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29389	#[cfg_attr(test, assert_instr(vptestmd))]
29390	pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29391	let and: __m128i = _mm_and_si128(a, b);
29392	let zero: __m128i = _mm_setzero_si128();
29393	_mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29394	}
29395
29396	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29397	///
29398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29399	#[inline]
29400	#[target_feature(enable = "avx512f")]
29401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29402	#[cfg_attr(test, assert_instr(vptestmq))]
29403	pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29404	let and: __m512i = _mm512_and_epi64(a, b);
29405	let zero: __m512i = _mm512_setzero_si512();
29406	_mm512_cmpneq_epi64_mask(a:and, b:zero)
29407	}
29408
29409	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29410	///
29411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29412	#[inline]
29413	#[target_feature(enable = "avx512f")]
29414	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29415	#[cfg_attr(test, assert_instr(vptestmq))]
29416	pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29417	let and: __m512i = _mm512_and_epi64(a, b);
29418	let zero: __m512i = _mm512_setzero_si512();
29419	_mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29420	}
29421
29422	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29423	///
29424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29425	#[inline]
29426	#[target_feature(enable = "avx512f,avx512vl")]
29427	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29428	#[cfg_attr(test, assert_instr(vptestmq))]
29429	pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29430	let and: __m256i = _mm256_and_si256(a, b);
29431	let zero: __m256i = _mm256_setzero_si256();
29432	_mm256_cmpneq_epi64_mask(a:and, b:zero)
29433	}
29434
29435	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29436	///
29437	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29438	#[inline]
29439	#[target_feature(enable = "avx512f,avx512vl")]
29440	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29441	#[cfg_attr(test, assert_instr(vptestmq))]
29442	pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29443	let and: __m256i = _mm256_and_si256(a, b);
29444	let zero: __m256i = _mm256_setzero_si256();
29445	_mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29446	}
29447
29448	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29449	///
29450	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29451	#[inline]
29452	#[target_feature(enable = "avx512f,avx512vl")]
29453	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29454	#[cfg_attr(test, assert_instr(vptestmq))]
29455	pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29456	let and: __m128i = _mm_and_si128(a, b);
29457	let zero: __m128i = _mm_setzero_si128();
29458	_mm_cmpneq_epi64_mask(a:and, b:zero)
29459	}
29460
29461	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29462	///
29463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29464	#[inline]
29465	#[target_feature(enable = "avx512f,avx512vl")]
29466	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29467	#[cfg_attr(test, assert_instr(vptestmq))]
29468	pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29469	let and: __m128i = _mm_and_si128(a, b);
29470	let zero: __m128i = _mm_setzero_si128();
29471	_mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29472	}
29473
29474	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29475	///
29476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29477	#[inline]
29478	#[target_feature(enable = "avx512f")]
29479	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29480	#[cfg_attr(test, assert_instr(vptestnmd))]
29481	pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29482	let and: __m512i = _mm512_and_epi32(a, b);
29483	let zero: __m512i = _mm512_setzero_si512();
29484	_mm512_cmpeq_epi32_mask(a:and, b:zero)
29485	}
29486
29487	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29488	///
29489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29490	#[inline]
29491	#[target_feature(enable = "avx512f")]
29492	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29493	#[cfg_attr(test, assert_instr(vptestnmd))]
29494	pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29495	let and: __m512i = _mm512_and_epi32(a, b);
29496	let zero: __m512i = _mm512_setzero_si512();
29497	_mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29498	}
29499
29500	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29501	///
29502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29503	#[inline]
29504	#[target_feature(enable = "avx512f,avx512vl")]
29505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29506	#[cfg_attr(test, assert_instr(vptestnmd))]
29507	pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29508	let and: __m256i = _mm256_and_si256(a, b);
29509	let zero: __m256i = _mm256_setzero_si256();
29510	_mm256_cmpeq_epi32_mask(a:and, b:zero)
29511	}
29512
29513	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29514	///
29515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29516	#[inline]
29517	#[target_feature(enable = "avx512f,avx512vl")]
29518	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29519	#[cfg_attr(test, assert_instr(vptestnmd))]
29520	pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29521	let and: __m256i = _mm256_and_si256(a, b);
29522	let zero: __m256i = _mm256_setzero_si256();
29523	_mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29524	}
29525
29526	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29527	///
29528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29529	#[inline]
29530	#[target_feature(enable = "avx512f,avx512vl")]
29531	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29532	#[cfg_attr(test, assert_instr(vptestnmd))]
29533	pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29534	let and: __m128i = _mm_and_si128(a, b);
29535	let zero: __m128i = _mm_setzero_si128();
29536	_mm_cmpeq_epi32_mask(a:and, b:zero)
29537	}
29538
29539	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29540	///
29541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29542	#[inline]
29543	#[target_feature(enable = "avx512f,avx512vl")]
29544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29545	#[cfg_attr(test, assert_instr(vptestnmd))]
29546	pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29547	let and: __m128i = _mm_and_si128(a, b);
29548	let zero: __m128i = _mm_setzero_si128();
29549	_mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29550	}
29551
29552	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29553	///
29554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29555	#[inline]
29556	#[target_feature(enable = "avx512f")]
29557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29558	#[cfg_attr(test, assert_instr(vptestnmq))]
29559	pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29560	let and: __m512i = _mm512_and_epi64(a, b);
29561	let zero: __m512i = _mm512_setzero_si512();
29562	_mm512_cmpeq_epi64_mask(a:and, b:zero)
29563	}
29564
29565	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29566	///
29567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29568	#[inline]
29569	#[target_feature(enable = "avx512f")]
29570	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29571	#[cfg_attr(test, assert_instr(vptestnmq))]
29572	pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29573	let and: __m512i = _mm512_and_epi64(a, b);
29574	let zero: __m512i = _mm512_setzero_si512();
29575	_mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29576	}
29577
29578	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29579	///
29580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29581	#[inline]
29582	#[target_feature(enable = "avx512f,avx512vl")]
29583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29584	#[cfg_attr(test, assert_instr(vptestnmq))]
29585	pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29586	let and: __m256i = _mm256_and_si256(a, b);
29587	let zero: __m256i = _mm256_setzero_si256();
29588	_mm256_cmpeq_epi64_mask(a:and, b:zero)
29589	}
29590
29591	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29592	///
29593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29594	#[inline]
29595	#[target_feature(enable = "avx512f,avx512vl")]
29596	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29597	#[cfg_attr(test, assert_instr(vptestnmq))]
29598	pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29599	let and: __m256i = _mm256_and_si256(a, b);
29600	let zero: __m256i = _mm256_setzero_si256();
29601	_mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29602	}
29603
29604	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29605	///
29606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29607	#[inline]
29608	#[target_feature(enable = "avx512f,avx512vl")]
29609	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29610	#[cfg_attr(test, assert_instr(vptestnmq))]
29611	pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29612	let and: __m128i = _mm_and_si128(a, b);
29613	let zero: __m128i = _mm_setzero_si128();
29614	_mm_cmpeq_epi64_mask(a:and, b:zero)
29615	}
29616
29617	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29618	///
29619	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29620	#[inline]
29621	#[target_feature(enable = "avx512f,avx512vl")]
29622	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29623	#[cfg_attr(test, assert_instr(vptestnmq))]
29624	pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29625	let and: __m128i = _mm_and_si128(a, b);
29626	let zero: __m128i = _mm_setzero_si128();
29627	_mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29628	}
29629
29630	/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29631	///
29632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29633	///
29634	/// # Safety of non-temporal stores
29635	///
29636	/// After using this intrinsic, but before any other access to the memory that this intrinsic
29637	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29638	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29639	/// return.
29640	///
29641	/// See [`_mm_sfence`] for details.
29642	#[inline]
29643	#[target_feature(enable = "avx512f")]
29644	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29645	#[cfg_attr(test, assert_instr(vmovntps))]
29646	#[allow(clippy::cast_ptr_alignment)]
29647	pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29648	crate::arch::asm!(
29649	vps!("vmovntps", ",{a}"),
29650	p = in(reg) mem_addr,
29651	a = in(zmm_reg) a,
29652	options(nostack, preserves_flags),
29653	);
29654	}
29655
29656	/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29657	///
29658	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29659	///
29660	/// # Safety of non-temporal stores
29661	///
29662	/// After using this intrinsic, but before any other access to the memory that this intrinsic
29663	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29664	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29665	/// return.
29666	///
29667	/// See [`_mm_sfence`] for details.
29668	#[inline]
29669	#[target_feature(enable = "avx512f")]
29670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29671	#[cfg_attr(test, assert_instr(vmovntpd))]
29672	#[allow(clippy::cast_ptr_alignment)]
29673	pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29674	crate::arch::asm!(
29675	vps!("vmovntpd", ",{a}"),
29676	p = in(reg) mem_addr,
29677	a = in(zmm_reg) a,
29678	options(nostack, preserves_flags),
29679	);
29680	}
29681
29682	/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29683	///
29684	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29685	///
29686	/// # Safety of non-temporal stores
29687	///
29688	/// After using this intrinsic, but before any other access to the memory that this intrinsic
29689	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29690	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29691	/// return.
29692	///
29693	/// See [`_mm_sfence`] for details.
29694	#[inline]
29695	#[target_feature(enable = "avx512f")]
29696	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29697	#[cfg_attr(test, assert_instr(vmovntdq))]
29698	#[allow(clippy::cast_ptr_alignment)]
29699	pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) {
29700	crate::arch::asm!(
29701	vps!("vmovntdq", ",{a}"),
29702	p = in(reg) mem_addr,
29703	a = in(zmm_reg) a,
29704	options(nostack, preserves_flags),
29705	);
29706	}
29707
29708	/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29709	/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29710	/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29711	///
29712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29713	#[inline]
29714	#[target_feature(enable = "avx512f")]
29715	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29716	pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29717	let dst: __m512i;
29718	crate::arch::asm!(
29719	vpl!("vmovntdqa {a}"),
29720	a = out(zmm_reg) dst,
29721	p = in(reg) mem_addr,
29722	options(pure, readonly, nostack, preserves_flags),
29723	);
29724	dst
29725	}
29726
29727	/// Sets packed 32-bit integers in `dst` with the supplied values.
29728	///
29729	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29730	#[inline]
29731	#[target_feature(enable = "avx512f")]
29732	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29733	pub fn _mm512_set_ps(
29734	e0: f32,
29735	e1: f32,
29736	e2: f32,
29737	e3: f32,
29738	e4: f32,
29739	e5: f32,
29740	e6: f32,
29741	e7: f32,
29742	e8: f32,
29743	e9: f32,
29744	e10: f32,
29745	e11: f32,
29746	e12: f32,
29747	e13: f32,
29748	e14: f32,
29749	e15: f32,
29750	) -> __m512 {
29751	_mm512_setr_ps(
29752	e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
29753	)
29754	}
29755
29756	/// Sets packed 32-bit integers in `dst` with the supplied values in
29757	/// reverse order.
29758	///
29759	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29760	#[inline]
29761	#[target_feature(enable = "avx512f")]
29762	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29763	pub fn _mm512_setr_ps(
29764	e0: f32,
29765	e1: f32,
29766	e2: f32,
29767	e3: f32,
29768	e4: f32,
29769	e5: f32,
29770	e6: f32,
29771	e7: f32,
29772	e8: f32,
29773	e9: f32,
29774	e10: f32,
29775	e11: f32,
29776	e12: f32,
29777	e13: f32,
29778	e14: f32,
29779	e15: f32,
29780	) -> __m512 {
29781	unsafe {
29782	let r: f32x16 = f32x16::new(
29783	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
29784	);
29785	transmute(src:r)
29786	}
29787	}
29788
29789	/// Broadcast 64-bit float `a` to all elements of `dst`.
29790	///
29791	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29792	#[inline]
29793	#[target_feature(enable = "avx512f")]
29794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29795	pub fn _mm512_set1_pd(a: f64) -> __m512d {
29796	unsafe { transmute(src:f64x8::splat(a)) }
29797	}
29798
29799	/// Broadcast 32-bit float `a` to all elements of `dst`.
29800	///
29801	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29802	#[inline]
29803	#[target_feature(enable = "avx512f")]
29804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29805	pub fn _mm512_set1_ps(a: f32) -> __m512 {
29806	unsafe { transmute(src:f32x16::splat(a)) }
29807	}
29808
29809	/// Sets packed 32-bit integers in `dst` with the supplied values.
29810	///
29811	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29812	#[inline]
29813	#[target_feature(enable = "avx512f")]
29814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29815	pub fn _mm512_set_epi32(
29816	e15: i32,
29817	e14: i32,
29818	e13: i32,
29819	e12: i32,
29820	e11: i32,
29821	e10: i32,
29822	e9: i32,
29823	e8: i32,
29824	e7: i32,
29825	e6: i32,
29826	e5: i32,
29827	e4: i32,
29828	e3: i32,
29829	e2: i32,
29830	e1: i32,
29831	e0: i32,
29832	) -> __m512i {
29833	_mm512_setr_epi32(
29834	e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
29835	)
29836	}
29837
29838	/// Broadcast 8-bit integer a to all elements of dst.
29839	///
29840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29841	#[inline]
29842	#[target_feature(enable = "avx512f")]
29843	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29844	pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29845	unsafe { transmute(src:i8x64::splat(a)) }
29846	}
29847
29848	/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29849	///
29850	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29851	#[inline]
29852	#[target_feature(enable = "avx512f")]
29853	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29854	pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29855	unsafe { transmute(src:i16x32::splat(a)) }
29856	}
29857
29858	/// Broadcast 32-bit integer `a` to all elements of `dst`.
29859	///
29860	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29861	#[inline]
29862	#[target_feature(enable = "avx512f")]
29863	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29864	pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29865	unsafe { transmute(src:i32x16::splat(a)) }
29866	}
29867
29868	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29869	///
29870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29871	#[inline]
29872	#[target_feature(enable = "avx512f")]
29873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29874	#[cfg_attr(test, assert_instr(vpbroadcastd))]
29875	pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29876	unsafe {
29877	let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
29878	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
29879	}
29880	}
29881
29882	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29883	///
29884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29885	#[inline]
29886	#[target_feature(enable = "avx512f")]
29887	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29888	#[cfg_attr(test, assert_instr(vpbroadcastd))]
29889	pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29890	unsafe {
29891	let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
29892	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
29893	}
29894	}
29895
29896	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29897	///
29898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29899	#[inline]
29900	#[target_feature(enable = "avx512f,avx512vl")]
29901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29902	#[cfg_attr(test, assert_instr(vpbroadcastd))]
29903	pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29904	unsafe {
29905	let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
29906	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
29907	}
29908	}
29909
29910	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29911	///
29912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29913	#[inline]
29914	#[target_feature(enable = "avx512f,avx512vl")]
29915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29916	#[cfg_attr(test, assert_instr(vpbroadcastd))]
29917	pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29918	unsafe {
29919	let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
29920	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
29921	}
29922	}
29923
29924	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29925	///
29926	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29927	#[inline]
29928	#[target_feature(enable = "avx512f,avx512vl")]
29929	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29930	#[cfg_attr(test, assert_instr(vpbroadcastd))]
29931	pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29932	unsafe {
29933	let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
29934	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
29935	}
29936	}
29937
29938	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29939	///
29940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29941	#[inline]
29942	#[target_feature(enable = "avx512f,avx512vl")]
29943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29944	#[cfg_attr(test, assert_instr(vpbroadcastd))]
29945	pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29946	unsafe {
29947	let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
29948	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
29949	}
29950	}
29951
29952	/// Broadcast 64-bit integer `a` to all elements of `dst`.
29953	///
29954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29955	#[inline]
29956	#[target_feature(enable = "avx512f")]
29957	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29958	pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29959	unsafe { transmute(src:i64x8::splat(a)) }
29960	}
29961
29962	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29963	///
29964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29965	#[inline]
29966	#[target_feature(enable = "avx512f")]
29967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29968	#[cfg_attr(test, assert_instr(vpbroadcastq))]
29969	pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29970	unsafe {
29971	let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
29972	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
29973	}
29974	}
29975
29976	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29977	///
29978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29979	#[inline]
29980	#[target_feature(enable = "avx512f")]
29981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29982	#[cfg_attr(test, assert_instr(vpbroadcastq))]
29983	pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
29984	unsafe {
29985	let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
29986	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
29987	}
29988	}
29989
29990	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29991	///
29992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
29993	#[inline]
29994	#[target_feature(enable = "avx512f,avx512vl")]
29995	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29996	#[cfg_attr(test, assert_instr(vpbroadcastq))]
29997	pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
29998	unsafe {
29999	let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
30000	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
30001	}
30002	}
30003
30004	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30005	///
30006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30007	#[inline]
30008	#[target_feature(enable = "avx512f,avx512vl")]
30009	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30010	#[cfg_attr(test, assert_instr(vpbroadcastq))]
30011	pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30012	unsafe {
30013	let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
30014	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
30015	}
30016	}
30017
30018	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30019	///
30020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30021	#[inline]
30022	#[target_feature(enable = "avx512f,avx512vl")]
30023	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30024	#[cfg_attr(test, assert_instr(vpbroadcastq))]
30025	pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30026	unsafe {
30027	let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
30028	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
30029	}
30030	}
30031
30032	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30033	///
30034	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30035	#[inline]
30036	#[target_feature(enable = "avx512f,avx512vl")]
30037	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30038	#[cfg_attr(test, assert_instr(vpbroadcastq))]
30039	pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30040	unsafe {
30041	let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
30042	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
30043	}
30044	}
30045
30046	/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30047	///
30048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30049	#[inline]
30050	#[target_feature(enable = "avx512f")]
30051	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30052	pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30053	_mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
30054	}
30055
30056	/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30057	///
30058	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30059	#[inline]
30060	#[target_feature(enable = "avx512f")]
30061	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30062	pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30063	_mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
30064	}
30065
30066	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30067	///
30068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30069	#[inline]
30070	#[target_feature(enable = "avx512f")]
30071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30072	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30073	pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30074	_mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30075	}
30076
30077	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30078	///
30079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30080	#[inline]
30081	#[target_feature(enable = "avx512f")]
30082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30083	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30084	pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30085	_mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30086	}
30087
30088	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30089	///
30090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30091	#[inline]
30092	#[target_feature(enable = "avx512f")]
30093	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30094	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30095	pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30096	_mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30097	}
30098
30099	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30100	///
30101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30102	#[inline]
30103	#[target_feature(enable = "avx512f")]
30104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30105	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30106	pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30107	_mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30108	}
30109
30110	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30111	///
30112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30113	#[inline]
30114	#[target_feature(enable = "avx512f")]
30115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30116	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30117	pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30118	_mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30119	}
30120
30121	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30122	///
30123	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30124	#[inline]
30125	#[target_feature(enable = "avx512f")]
30126	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30127	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30128	pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30129	_mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30130	}
30131
30132	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30133	///
30134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30135	#[inline]
30136	#[target_feature(enable = "avx512f")]
30137	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30138	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30139	pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30140	_mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30141	}
30142
30143	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30144	///
30145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30146	#[inline]
30147	#[target_feature(enable = "avx512f")]
30148	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30149	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30150	pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30151	_mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30152	}
30153
30154	/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30155	///
30156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30157	#[inline]
30158	#[target_feature(enable = "avx512f")]
30159	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30160	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30161	pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30162	_mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30163	}
30164
30165	/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30166	///
30167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30168	#[inline]
30169	#[target_feature(enable = "avx512f")]
30170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30171	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30172	pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30173	_mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30174	}
30175
30176	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30177	///
30178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30179	#[inline]
30180	#[target_feature(enable = "avx512f")]
30181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30182	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30183	pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30184	_mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30185	}
30186
30187	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30188	///
30189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30190	#[inline]
30191	#[target_feature(enable = "avx512f")]
30192	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30193	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30194	pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30195	_mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30196	}
30197
30198	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30199	///
30200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30201	#[inline]
30202	#[target_feature(enable = "avx512f")]
30203	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30204	#[rustc_legacy_const_generics(`2`)]
30205	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30206	pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30207	unsafe {
30208	static_assert_uimm_bits!(IMM8, `5`);
30209	let neg_one: i16 = `-1`;
30210	let a: f32x16 = a.as_f32x16();
30211	let b: f32x16 = b.as_f32x16();
30212	let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30213	r.cast_unsigned()
30214	}
30215	}
30216
30217	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30218	///
30219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30220	#[inline]
30221	#[target_feature(enable = "avx512f")]
30222	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30223	#[rustc_legacy_const_generics(`3`)]
30224	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30225	pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30226	unsafe {
30227	static_assert_uimm_bits!(IMM8, `5`);
30228	let a: f32x16 = a.as_f32x16();
30229	let b: f32x16 = b.as_f32x16();
30230	let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
30231	r.cast_unsigned()
30232	}
30233	}
30234
30235	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30236	///
30237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30238	#[inline]
30239	#[target_feature(enable = "avx512f,avx512vl")]
30240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30241	#[rustc_legacy_const_generics(`2`)]
30242	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30243	pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30244	unsafe {
30245	static_assert_uimm_bits!(IMM8, `5`);
30246	let neg_one: i8 = `-1`;
30247	let a: f32x8 = a.as_f32x8();
30248	let b: f32x8 = b.as_f32x8();
30249	let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
30250	r.cast_unsigned()
30251	}
30252	}
30253
30254	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30255	///
30256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30257	#[inline]
30258	#[target_feature(enable = "avx512f,avx512vl")]
30259	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30260	#[rustc_legacy_const_generics(`3`)]
30261	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30262	pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30263	unsafe {
30264	static_assert_uimm_bits!(IMM8, `5`);
30265	let a: f32x8 = a.as_f32x8();
30266	let b: f32x8 = b.as_f32x8();
30267	let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
30268	r.cast_unsigned()
30269	}
30270	}
30271
30272	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30273	///
30274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30275	#[inline]
30276	#[target_feature(enable = "avx512f,avx512vl")]
30277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30278	#[rustc_legacy_const_generics(`2`)]
30279	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30280	pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30281	unsafe {
30282	static_assert_uimm_bits!(IMM8, `5`);
30283	let neg_one: i8 = `-1`;
30284	let a: f32x4 = a.as_f32x4();
30285	let b: f32x4 = b.as_f32x4();
30286	let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
30287	r.cast_unsigned()
30288	}
30289	}
30290
30291	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30292	///
30293	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30294	#[inline]
30295	#[target_feature(enable = "avx512f,avx512vl")]
30296	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30297	#[rustc_legacy_const_generics(`3`)]
30298	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30299	pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30300	unsafe {
30301	static_assert_uimm_bits!(IMM8, `5`);
30302	let a: f32x4 = a.as_f32x4();
30303	let b: f32x4 = b.as_f32x4();
30304	let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
30305	r.cast_unsigned()
30306	}
30307	}
30308
30309	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30310	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30311	///
30312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30313	#[inline]
30314	#[target_feature(enable = "avx512f")]
30315	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30316	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30317	#[rustc_legacy_const_generics(`2`, `3`)]
30318	pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30319	a: __m512,
30320	b: __m512,
30321	) -> __mmask16 {
30322	unsafe {
30323	static_assert_uimm_bits!(IMM5, `5`);
30324	static_assert_mantissas_sae!(SAE);
30325	let neg_one: i16 = `-1`;
30326	let a: f32x16 = a.as_f32x16();
30327	let b: f32x16 = b.as_f32x16();
30328	let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
30329	r.cast_unsigned()
30330	}
30331	}
30332
30333	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30334	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30335	///
30336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30337	#[inline]
30338	#[target_feature(enable = "avx512f")]
30339	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30340	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30341	#[rustc_legacy_const_generics(`3`, `4`)]
30342	pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30343	m: __mmask16,
30344	a: __m512,
30345	b: __m512,
30346	) -> __mmask16 {
30347	unsafe {
30348	static_assert_uimm_bits!(IMM5, `5`);
30349	static_assert_mantissas_sae!(SAE);
30350	let a: f32x16 = a.as_f32x16();
30351	let b: f32x16 = b.as_f32x16();
30352	let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
30353	r.cast_unsigned()
30354	}
30355	}
30356
30357	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30358	///
30359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30360	#[inline]
30361	#[target_feature(enable = "avx512f")]
30362	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30363	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30364	pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30365	_mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30366	}
30367
30368	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30369	///
30370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30371	#[inline]
30372	#[target_feature(enable = "avx512f")]
30373	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30374	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30375	pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30376	_mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30377	}
30378
30379	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30380	///
30381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30382	#[inline]
30383	#[target_feature(enable = "avx512f")]
30384	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30385	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30386	pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30387	_mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30388	}
30389
30390	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30391	///
30392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30393	#[inline]
30394	#[target_feature(enable = "avx512f")]
30395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30396	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30397	pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30398	_mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30399	}
30400
30401	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30402	///
30403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30404	#[inline]
30405	#[target_feature(enable = "avx512f")]
30406	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30407	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30408	pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30409	_mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30410	}
30411
30412	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30413	///
30414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30415	#[inline]
30416	#[target_feature(enable = "avx512f")]
30417	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30418	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30419	pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30420	_mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30421	}
30422
30423	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30424	///
30425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30426	#[inline]
30427	#[target_feature(enable = "avx512f")]
30428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30429	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30430	pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30431	_mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30432	}
30433
30434	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30435	///
30436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30437	#[inline]
30438	#[target_feature(enable = "avx512f")]
30439	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30440	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30441	pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30442	_mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
30443	}
30444
30445	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30446	///
30447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30448	#[inline]
30449	#[target_feature(enable = "avx512f")]
30450	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30451	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30452	pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30453	_mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30454	}
30455
30456	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30457	///
30458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30459	#[inline]
30460	#[target_feature(enable = "avx512f")]
30461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30462	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30463	pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30464	_mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30465	}
30466
30467	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30468	///
30469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30470	#[inline]
30471	#[target_feature(enable = "avx512f")]
30472	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30473	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30474	pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30475	_mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30476	}
30477
30478	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30479	///
30480	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30481	#[inline]
30482	#[target_feature(enable = "avx512f")]
30483	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30484	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30485	pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30486	_mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30487	}
30488
30489	/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30490	///
30491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30492	#[inline]
30493	#[target_feature(enable = "avx512f")]
30494	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30495	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30496	pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30497	_mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30498	}
30499
30500	/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30501	///
30502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30503	#[inline]
30504	#[target_feature(enable = "avx512f")]
30505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30506	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30507	pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30508	_mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30509	}
30510
30511	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30512	///
30513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30514	#[inline]
30515	#[target_feature(enable = "avx512f")]
30516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30517	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30518	pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30519	_mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30520	}
30521
30522	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30523	///
30524	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30525	#[inline]
30526	#[target_feature(enable = "avx512f")]
30527	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30528	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30529	pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30530	_mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30531	}
30532
30533	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30534	///
30535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30536	#[inline]
30537	#[target_feature(enable = "avx512f")]
30538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30539	#[rustc_legacy_const_generics(`2`)]
30540	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30541	pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30542	unsafe {
30543	static_assert_uimm_bits!(IMM8, `5`);
30544	let neg_one: i8 = `-1`;
30545	let a: f64x8 = a.as_f64x8();
30546	let b: f64x8 = b.as_f64x8();
30547	let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30548	r.cast_unsigned()
30549	}
30550	}
30551
30552	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30553	///
30554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30555	#[inline]
30556	#[target_feature(enable = "avx512f")]
30557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30558	#[rustc_legacy_const_generics(`3`)]
30559	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30560	pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30561	unsafe {
30562	static_assert_uimm_bits!(IMM8, `5`);
30563	let a: f64x8 = a.as_f64x8();
30564	let b: f64x8 = b.as_f64x8();
30565	let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30566	r.cast_unsigned()
30567	}
30568	}
30569
30570	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30571	///
30572	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30573	#[inline]
30574	#[target_feature(enable = "avx512f,avx512vl")]
30575	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30576	#[rustc_legacy_const_generics(`2`)]
30577	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30578	pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30579	unsafe {
30580	static_assert_uimm_bits!(IMM8, `5`);
30581	let neg_one: i8 = `-1`;
30582	let a: f64x4 = a.as_f64x4();
30583	let b: f64x4 = b.as_f64x4();
30584	let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
30585	r.cast_unsigned()
30586	}
30587	}
30588
30589	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30590	///
30591	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30592	#[inline]
30593	#[target_feature(enable = "avx512f,avx512vl")]
30594	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30595	#[rustc_legacy_const_generics(`3`)]
30596	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30597	pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30598	unsafe {
30599	static_assert_uimm_bits!(IMM8, `5`);
30600	let a: f64x4 = a.as_f64x4();
30601	let b: f64x4 = b.as_f64x4();
30602	let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
30603	r.cast_unsigned()
30604	}
30605	}
30606
30607	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30608	///
30609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30610	#[inline]
30611	#[target_feature(enable = "avx512f,avx512vl")]
30612	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30613	#[rustc_legacy_const_generics(`2`)]
30614	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30615	pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30616	unsafe {
30617	static_assert_uimm_bits!(IMM8, `5`);
30618	let neg_one: i8 = `-1`;
30619	let a: f64x2 = a.as_f64x2();
30620	let b: f64x2 = b.as_f64x2();
30621	let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
30622	r.cast_unsigned()
30623	}
30624	}
30625
30626	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30627	///
30628	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30629	#[inline]
30630	#[target_feature(enable = "avx512f,avx512vl")]
30631	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30632	#[rustc_legacy_const_generics(`3`)]
30633	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30634	pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30635	unsafe {
30636	static_assert_uimm_bits!(IMM8, `5`);
30637	let a: f64x2 = a.as_f64x2();
30638	let b: f64x2 = b.as_f64x2();
30639	let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
30640	r.cast_unsigned()
30641	}
30642	}
30643
30644	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30645	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30646	///
30647	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30648	#[inline]
30649	#[target_feature(enable = "avx512f")]
30650	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30651	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30652	#[rustc_legacy_const_generics(`2`, `3`)]
30653	pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30654	a: __m512d,
30655	b: __m512d,
30656	) -> __mmask8 {
30657	unsafe {
30658	static_assert_uimm_bits!(IMM5, `5`);
30659	static_assert_mantissas_sae!(SAE);
30660	let neg_one: i8 = `-1`;
30661	let a: f64x8 = a.as_f64x8();
30662	let b: f64x8 = b.as_f64x8();
30663	let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
30664	r.cast_unsigned()
30665	}
30666	}
30667
30668	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30669	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30670	///
30671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30672	#[inline]
30673	#[target_feature(enable = "avx512f")]
30674	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30675	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30676	#[rustc_legacy_const_generics(`3`, `4`)]
30677	pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30678	k1: __mmask8,
30679	a: __m512d,
30680	b: __m512d,
30681	) -> __mmask8 {
30682	unsafe {
30683	static_assert_uimm_bits!(IMM5, `5`);
30684	static_assert_mantissas_sae!(SAE);
30685	let a: f64x8 = a.as_f64x8();
30686	let b: f64x8 = b.as_f64x8();
30687	let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
30688	r.cast_unsigned()
30689	}
30690	}
30691
30692	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30693	///
30694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30695	#[inline]
30696	#[target_feature(enable = "avx512f")]
30697	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30698	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30699	pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30700	_mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30701	}
30702
30703	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30704	///
30705	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30706	#[inline]
30707	#[target_feature(enable = "avx512f")]
30708	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30709	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30710	pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30711	_mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30712	}
30713
30714	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30715	///
30716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30717	#[inline]
30718	#[target_feature(enable = "avx512f")]
30719	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30720	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30721	pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30722	_mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30723	}
30724
30725	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30726	///
30727	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30728	#[inline]
30729	#[target_feature(enable = "avx512f")]
30730	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30731	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30732	pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30733	_mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30734	}
30735
30736	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30737	///
30738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30739	#[inline]
30740	#[target_feature(enable = "avx512f")]
30741	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30742	#[rustc_legacy_const_generics(`2`)]
30743	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30744	pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30745	unsafe {
30746	static_assert_uimm_bits!(IMM8, `5`);
30747	let neg_one: i8 = `-1`;
30748	let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30749	r.cast_unsigned()
30750	}
30751	}
30752
30753	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30754	///
30755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30756	#[inline]
30757	#[target_feature(enable = "avx512f")]
30758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30759	#[rustc_legacy_const_generics(`3`)]
30760	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30761	pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30762	unsafe {
30763	static_assert_uimm_bits!(IMM8, `5`);
30764	let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30765	r.cast_unsigned()
30766	}
30767	}
30768
30769	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30770	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30771	///
30772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30773	#[inline]
30774	#[target_feature(enable = "avx512f")]
30775	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30777	#[rustc_legacy_const_generics(`2`, `3`)]
30778	pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30779	unsafe {
30780	static_assert_uimm_bits!(IMM5, `5`);
30781	static_assert_mantissas_sae!(SAE);
30782	let neg_one: i8 = `-1`;
30783	let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
30784	r.cast_unsigned()
30785	}
30786	}
30787
30788	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30789	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30790	///
30791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30792	#[inline]
30793	#[target_feature(enable = "avx512f")]
30794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30795	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30796	#[rustc_legacy_const_generics(`3`, `4`)]
30797	pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30798	k1: __mmask8,
30799	a: __m128,
30800	b: __m128,
30801	) -> __mmask8 {
30802	unsafe {
30803	static_assert_uimm_bits!(IMM5, `5`);
30804	static_assert_mantissas_sae!(SAE);
30805	let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
30806	r.cast_unsigned()
30807	}
30808	}
30809
30810	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30811	///
30812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30813	#[inline]
30814	#[target_feature(enable = "avx512f")]
30815	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30816	#[rustc_legacy_const_generics(`2`)]
30817	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30818	pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30819	unsafe {
30820	static_assert_uimm_bits!(IMM8, `5`);
30821	let neg_one: i8 = `-1`;
30822	let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30823	r.cast_unsigned()
30824	}
30825	}
30826
30827	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30828	///
30829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30830	#[inline]
30831	#[target_feature(enable = "avx512f")]
30832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30833	#[rustc_legacy_const_generics(`3`)]
30834	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
30835	pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30836	unsafe {
30837	static_assert_uimm_bits!(IMM8, `5`);
30838	let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30839	r.cast_unsigned()
30840	}
30841	}
30842
30843	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30844	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30845	///
30846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30847	#[inline]
30848	#[target_feature(enable = "avx512f")]
30849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30850	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30851	#[rustc_legacy_const_generics(`2`, `3`)]
30852	pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30853	unsafe {
30854	static_assert_uimm_bits!(IMM5, `5`);
30855	static_assert_mantissas_sae!(SAE);
30856	let neg_one: i8 = `-1`;
30857	let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
30858	r.cast_unsigned()
30859	}
30860	}
30861
30862	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30863	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30864	///
30865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30866	#[inline]
30867	#[target_feature(enable = "avx512f")]
30868	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30869	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
30870	#[rustc_legacy_const_generics(`3`, `4`)]
30871	pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30872	k1: __mmask8,
30873	a: __m128d,
30874	b: __m128d,
30875	) -> __mmask8 {
30876	unsafe {
30877	static_assert_uimm_bits!(IMM5, `5`);
30878	static_assert_mantissas_sae!(SAE);
30879	let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
30880	r.cast_unsigned()
30881	}
30882	}
30883
30884	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30885	///
30886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30887	#[inline]
30888	#[target_feature(enable = "avx512f")]
30889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30890	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30891	pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30892	unsafe { simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16())) }
30893	}
30894
30895	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30896	///
30897	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30898	#[inline]
30899	#[target_feature(enable = "avx512f")]
30900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30901	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30902	pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30903	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30904	}
30905
30906	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30907	///
30908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30909	#[inline]
30910	#[target_feature(enable = "avx512f,avx512vl")]
30911	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30912	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30913	pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30914	unsafe { simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8())) }
30915	}
30916
30917	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30918	///
30919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30920	#[inline]
30921	#[target_feature(enable = "avx512f,avx512vl")]
30922	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30923	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30924	pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30925	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30926	}
30927
30928	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30929	///
30930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30931	#[inline]
30932	#[target_feature(enable = "avx512f,avx512vl")]
30933	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30934	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30935	pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30936	unsafe { simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4())) }
30937	}
30938
30939	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30940	///
30941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30942	#[inline]
30943	#[target_feature(enable = "avx512f,avx512vl")]
30944	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30945	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30946	pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30947	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30948	}
30949
30950	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30951	///
30952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30953	#[inline]
30954	#[target_feature(enable = "avx512f")]
30955	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30956	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30957	pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30958	unsafe { simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16())) }
30959	}
30960
30961	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30962	///
30963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30964	#[inline]
30965	#[target_feature(enable = "avx512f")]
30966	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30967	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30968	pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30969	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30970	}
30971
30972	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30973	///
30974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30975	#[inline]
30976	#[target_feature(enable = "avx512f,avx512vl")]
30977	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30978	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30979	pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30980	unsafe { simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8())) }
30981	}
30982
30983	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30984	///
30985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
30986	#[inline]
30987	#[target_feature(enable = "avx512f,avx512vl")]
30988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30989	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30990	pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30991	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30992	}
30993
30994	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30995	///
30996	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
30997	#[inline]
30998	#[target_feature(enable = "avx512f,avx512vl")]
30999	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31000	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31001	pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31002	unsafe { simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4())) }
31003	}
31004
31005	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31006	///
31007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31008	#[inline]
31009	#[target_feature(enable = "avx512f,avx512vl")]
31010	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31011	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31012	pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31013	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31014	}
31015
31016	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31017	///
31018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31019	#[inline]
31020	#[target_feature(enable = "avx512f")]
31021	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31022	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31023	pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31024	unsafe { simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16())) }
31025	}
31026
31027	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31028	///
31029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31030	#[inline]
31031	#[target_feature(enable = "avx512f")]
31032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31033	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31034	pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31035	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31036	}
31037
31038	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31039	///
31040	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31041	#[inline]
31042	#[target_feature(enable = "avx512f,avx512vl")]
31043	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31044	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31045	pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31046	unsafe { simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8())) }
31047	}
31048
31049	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31050	///
31051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31052	#[inline]
31053	#[target_feature(enable = "avx512f,avx512vl")]
31054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31055	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31056	pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31057	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31058	}
31059
31060	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31061	///
31062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31063	#[inline]
31064	#[target_feature(enable = "avx512f,avx512vl")]
31065	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31066	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31067	pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31068	unsafe { simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4())) }
31069	}
31070
31071	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31072	///
31073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31074	#[inline]
31075	#[target_feature(enable = "avx512f,avx512vl")]
31076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31077	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31078	pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31079	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31080	}
31081
31082	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31083	///
31084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31085	#[inline]
31086	#[target_feature(enable = "avx512f")]
31087	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31088	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31089	pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31090	unsafe { simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16())) }
31091	}
31092
31093	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31094	///
31095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31096	#[inline]
31097	#[target_feature(enable = "avx512f")]
31098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31099	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31100	pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31101	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31102	}
31103
31104	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31105	///
31106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31107	#[inline]
31108	#[target_feature(enable = "avx512f,avx512vl")]
31109	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31110	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31111	pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31112	unsafe { simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8())) }
31113	}
31114
31115	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31116	///
31117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31118	#[inline]
31119	#[target_feature(enable = "avx512f,avx512vl")]
31120	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31121	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31122	pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31123	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31124	}
31125
31126	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31127	///
31128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31129	#[inline]
31130	#[target_feature(enable = "avx512f,avx512vl")]
31131	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31132	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31133	pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31134	unsafe { simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4())) }
31135	}
31136
31137	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31138	///
31139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31140	#[inline]
31141	#[target_feature(enable = "avx512f,avx512vl")]
31142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31143	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31144	pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31145	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31146	}
31147
31148	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31149	///
31150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31151	#[inline]
31152	#[target_feature(enable = "avx512f")]
31153	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31154	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31155	pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31156	unsafe { simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16())) }
31157	}
31158
31159	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31160	///
31161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31162	#[inline]
31163	#[target_feature(enable = "avx512f")]
31164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31165	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31166	pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31167	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31168	}
31169
31170	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31171	///
31172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31173	#[inline]
31174	#[target_feature(enable = "avx512f,avx512vl")]
31175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31176	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31177	pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31178	unsafe { simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8())) }
31179	}
31180
31181	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31182	///
31183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31184	#[inline]
31185	#[target_feature(enable = "avx512f,avx512vl")]
31186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31187	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31188	pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31189	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31190	}
31191
31192	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31193	///
31194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31195	#[inline]
31196	#[target_feature(enable = "avx512f,avx512vl")]
31197	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31198	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31199	pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31200	unsafe { simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4())) }
31201	}
31202
31203	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31204	///
31205	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31206	#[inline]
31207	#[target_feature(enable = "avx512f,avx512vl")]
31208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31209	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31210	pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31211	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31212	}
31213
31214	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31215	///
31216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31217	#[inline]
31218	#[target_feature(enable = "avx512f")]
31219	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31220	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31221	pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31222	unsafe { simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16())) }
31223	}
31224
31225	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31226	///
31227	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31228	#[inline]
31229	#[target_feature(enable = "avx512f")]
31230	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31231	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31232	pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31233	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31234	}
31235
31236	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31237	///
31238	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31239	#[inline]
31240	#[target_feature(enable = "avx512f,avx512vl")]
31241	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31242	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31243	pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31244	unsafe { simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8())) }
31245	}
31246
31247	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31248	///
31249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31250	#[inline]
31251	#[target_feature(enable = "avx512f,avx512vl")]
31252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31253	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31254	pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31255	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31256	}
31257
31258	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31259	///
31260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31261	#[inline]
31262	#[target_feature(enable = "avx512f,avx512vl")]
31263	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31264	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31265	pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31266	unsafe { simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4())) }
31267	}
31268
31269	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31270	///
31271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31272	#[inline]
31273	#[target_feature(enable = "avx512f,avx512vl")]
31274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31275	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31276	pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31277	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31278	}
31279
31280	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31281	///
31282	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31283	#[inline]
31284	#[target_feature(enable = "avx512f")]
31285	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31286	#[rustc_legacy_const_generics(`2`)]
31287	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31288	pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31289	unsafe {
31290	static_assert_uimm_bits!(IMM3, `3`);
31291	let a: u32x16 = a.as_u32x16();
31292	let b: u32x16 = b.as_u32x16();
31293	let r: i32x16 = match IMM3 {
31294	`0` => simd_eq(x:a, y:b),
31295	`1` => simd_lt(x:a, y:b),
31296	`2` => simd_le(x:a, y:b),
31297	`3` => i32x16::ZERO,
31298	`4` => simd_ne(x:a, y:b),
31299	`5` => simd_ge(x:a, y:b),
31300	`6` => simd_gt(x:a, y:b),
31301	_ => i32x16::splat(`-1`),
31302	};
31303	simd_bitmask(r)
31304	}
31305	}
31306
31307	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31308	///
31309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31310	#[inline]
31311	#[target_feature(enable = "avx512f")]
31312	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31313	#[rustc_legacy_const_generics(`3`)]
31314	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31315	pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31316	k1: __mmask16,
31317	a: __m512i,
31318	b: __m512i,
31319	) -> __mmask16 {
31320	unsafe {
31321	static_assert_uimm_bits!(IMM3, `3`);
31322	let a: u32x16 = a.as_u32x16();
31323	let b: u32x16 = b.as_u32x16();
31324	let k1: i32x16 = simd_select_bitmask(m:k1, yes:i32x16::splat(`-1`), no:i32x16::ZERO);
31325	let r: i32x16 = match IMM3 {
31326	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31327	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31328	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
31329	`3` => i32x16::ZERO,
31330	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31331	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31332	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31333	_ => k1,
31334	};
31335	simd_bitmask(r)
31336	}
31337	}
31338
31339	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31340	///
31341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31342	#[inline]
31343	#[target_feature(enable = "avx512f,avx512vl")]
31344	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31345	#[rustc_legacy_const_generics(`2`)]
31346	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31347	pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31348	unsafe {
31349	static_assert_uimm_bits!(IMM3, `3`);
31350	let a: u32x8 = a.as_u32x8();
31351	let b: u32x8 = b.as_u32x8();
31352	let r: i32x8 = match IMM3 {
31353	`0` => simd_eq(x:a, y:b),
31354	`1` => simd_lt(x:a, y:b),
31355	`2` => simd_le(x:a, y:b),
31356	`3` => i32x8::ZERO,
31357	`4` => simd_ne(x:a, y:b),
31358	`5` => simd_ge(x:a, y:b),
31359	`6` => simd_gt(x:a, y:b),
31360	_ => i32x8::splat(`-1`),
31361	};
31362	simd_bitmask(r)
31363	}
31364	}
31365
31366	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31367	///
31368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31369	#[inline]
31370	#[target_feature(enable = "avx512f,avx512vl")]
31371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31372	#[rustc_legacy_const_generics(`3`)]
31373	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31374	pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31375	k1: __mmask8,
31376	a: __m256i,
31377	b: __m256i,
31378	) -> __mmask8 {
31379	unsafe {
31380	static_assert_uimm_bits!(IMM3, `3`);
31381	let a: u32x8 = a.as_u32x8();
31382	let b: u32x8 = b.as_u32x8();
31383	let k1: i32x8 = simd_select_bitmask(m:k1, yes:i32x8::splat(`-1`), no:i32x8::ZERO);
31384	let r: i32x8 = match IMM3 {
31385	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31386	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31387	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
31388	`3` => i32x8::ZERO,
31389	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31390	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31391	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31392	_ => k1,
31393	};
31394	simd_bitmask(r)
31395	}
31396	}
31397
31398	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31399	///
31400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31401	#[inline]
31402	#[target_feature(enable = "avx512f,avx512vl")]
31403	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31404	#[rustc_legacy_const_generics(`2`)]
31405	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31406	pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31407	unsafe {
31408	static_assert_uimm_bits!(IMM3, `3`);
31409	let a: u32x4 = a.as_u32x4();
31410	let b: u32x4 = b.as_u32x4();
31411	let r: i32x4 = match IMM3 {
31412	`0` => simd_eq(x:a, y:b),
31413	`1` => simd_lt(x:a, y:b),
31414	`2` => simd_le(x:a, y:b),
31415	`3` => i32x4::ZERO,
31416	`4` => simd_ne(x:a, y:b),
31417	`5` => simd_ge(x:a, y:b),
31418	`6` => simd_gt(x:a, y:b),
31419	_ => i32x4::splat(`-1`),
31420	};
31421	simd_bitmask(r)
31422	}
31423	}
31424
31425	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31426	///
31427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31428	#[inline]
31429	#[target_feature(enable = "avx512f,avx512vl")]
31430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31431	#[rustc_legacy_const_generics(`3`)]
31432	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31433	pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31434	k1: __mmask8,
31435	a: __m128i,
31436	b: __m128i,
31437	) -> __mmask8 {
31438	unsafe {
31439	static_assert_uimm_bits!(IMM3, `3`);
31440	let a: u32x4 = a.as_u32x4();
31441	let b: u32x4 = b.as_u32x4();
31442	let k1: i32x4 = simd_select_bitmask(m:k1, yes:i32x4::splat(`-1`), no:i32x4::ZERO);
31443	let r: i32x4 = match IMM3 {
31444	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31445	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31446	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
31447	`3` => i32x4::ZERO,
31448	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31449	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31450	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31451	_ => k1,
31452	};
31453	simd_bitmask(r)
31454	}
31455	}
31456
31457	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31458	///
31459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31460	#[inline]
31461	#[target_feature(enable = "avx512f")]
31462	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31463	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31464	pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31465	unsafe { simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16())) }
31466	}
31467
31468	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31469	///
31470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31471	#[inline]
31472	#[target_feature(enable = "avx512f")]
31473	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31474	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31475	pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31476	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31477	}
31478
31479	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31480	///
31481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31482	#[inline]
31483	#[target_feature(enable = "avx512f,avx512vl")]
31484	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31485	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31486	pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31487	unsafe { simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8())) }
31488	}
31489
31490	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31491	///
31492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31493	#[inline]
31494	#[target_feature(enable = "avx512f,avx512vl")]
31495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31496	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31497	pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31498	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31499	}
31500
31501	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31502	///
31503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31504	#[inline]
31505	#[target_feature(enable = "avx512f,avx512vl")]
31506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31507	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31508	pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31509	unsafe { simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
31510	}
31511
31512	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31513	///
31514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31515	#[inline]
31516	#[target_feature(enable = "avx512f,avx512vl")]
31517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31518	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31519	pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31520	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31521	}
31522
31523	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31524	///
31525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31526	#[inline]
31527	#[target_feature(enable = "avx512f")]
31528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31529	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31530	pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31531	unsafe { simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16())) }
31532	}
31533
31534	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31535	///
31536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31537	#[inline]
31538	#[target_feature(enable = "avx512f")]
31539	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31540	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31541	pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31542	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31543	}
31544
31545	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31546	///
31547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31548	#[inline]
31549	#[target_feature(enable = "avx512f,avx512vl")]
31550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31551	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31552	pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31553	unsafe { simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
31554	}
31555
31556	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31557	///
31558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31559	#[inline]
31560	#[target_feature(enable = "avx512f,avx512vl")]
31561	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31562	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31563	pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31564	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31565	}
31566
31567	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31568	///
31569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31570	#[inline]
31571	#[target_feature(enable = "avx512f,avx512vl")]
31572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31573	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31574	pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31575	unsafe { simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
31576	}
31577
31578	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31579	///
31580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31581	#[inline]
31582	#[target_feature(enable = "avx512f,avx512vl")]
31583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31584	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31585	pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31586	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31587	}
31588
31589	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31590	///
31591	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31592	#[inline]
31593	#[target_feature(enable = "avx512f")]
31594	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31595	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31596	pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31597	unsafe { simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16())) }
31598	}
31599
31600	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31601	///
31602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31603	#[inline]
31604	#[target_feature(enable = "avx512f")]
31605	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31606	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31607	pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31608	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31609	}
31610
31611	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31612	///
31613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31614	#[inline]
31615	#[target_feature(enable = "avx512f,avx512vl")]
31616	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31617	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31618	pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31619	unsafe { simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8())) }
31620	}
31621
31622	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31623	///
31624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31625	#[inline]
31626	#[target_feature(enable = "avx512f,avx512vl")]
31627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31628	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31629	pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31630	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31631	}
31632
31633	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31634	///
31635	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31636	#[inline]
31637	#[target_feature(enable = "avx512f,avx512vl")]
31638	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31639	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31640	pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31641	unsafe { simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4())) }
31642	}
31643
31644	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31645	///
31646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31647	#[inline]
31648	#[target_feature(enable = "avx512f,avx512vl")]
31649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31650	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31651	pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31652	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31653	}
31654
31655	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31656	///
31657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31658	#[inline]
31659	#[target_feature(enable = "avx512f")]
31660	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31661	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31662	pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31663	unsafe { simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16())) }
31664	}
31665
31666	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31667	///
31668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31669	#[inline]
31670	#[target_feature(enable = "avx512f")]
31671	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31672	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31673	pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31674	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31675	}
31676
31677	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31678	///
31679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31680	#[inline]
31681	#[target_feature(enable = "avx512f,avx512vl")]
31682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31683	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31684	pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31685	unsafe { simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8())) }
31686	}
31687
31688	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31689	///
31690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31691	#[inline]
31692	#[target_feature(enable = "avx512f,avx512vl")]
31693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31694	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31695	pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31696	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31697	}
31698
31699	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31700	///
31701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31702	#[inline]
31703	#[target_feature(enable = "avx512f,avx512vl")]
31704	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31705	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31706	pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31707	unsafe { simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4())) }
31708	}
31709
31710	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31711	///
31712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31713	#[inline]
31714	#[target_feature(enable = "avx512f,avx512vl")]
31715	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31716	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31717	pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31718	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31719	}
31720
31721	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31722	///
31723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31724	#[inline]
31725	#[target_feature(enable = "avx512f")]
31726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31727	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31728	pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31729	unsafe { simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16())) }
31730	}
31731
31732	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31733	///
31734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31735	#[inline]
31736	#[target_feature(enable = "avx512f")]
31737	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31738	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31739	pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31740	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31741	}
31742
31743	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31744	///
31745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31746	#[inline]
31747	#[target_feature(enable = "avx512f,avx512vl")]
31748	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31749	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31750	pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31751	unsafe { simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
31752	}
31753
31754	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31755	///
31756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31757	#[inline]
31758	#[target_feature(enable = "avx512f,avx512vl")]
31759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31760	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31761	pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31762	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31763	}
31764
31765	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31766	///
31767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31768	#[inline]
31769	#[target_feature(enable = "avx512f,avx512vl")]
31770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31771	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31772	pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31773	unsafe { simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
31774	}
31775
31776	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31777	///
31778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31779	#[inline]
31780	#[target_feature(enable = "avx512f,avx512vl")]
31781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31782	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31783	pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31784	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31785	}
31786
31787	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31788	///
31789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31790	#[inline]
31791	#[target_feature(enable = "avx512f")]
31792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31793	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31794	pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31795	unsafe { simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16())) }
31796	}
31797
31798	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31799	///
31800	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31801	#[inline]
31802	#[target_feature(enable = "avx512f")]
31803	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31804	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31805	pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31806	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31807	}
31808
31809	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31810	///
31811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31812	#[inline]
31813	#[target_feature(enable = "avx512f,avx512vl")]
31814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31815	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31816	pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31817	unsafe { simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8())) }
31818	}
31819
31820	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31821	///
31822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31823	#[inline]
31824	#[target_feature(enable = "avx512f,avx512vl")]
31825	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31826	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31827	pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31828	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31829	}
31830
31831	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31832	///
31833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31834	#[inline]
31835	#[target_feature(enable = "avx512f,avx512vl")]
31836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31837	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31838	pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31839	unsafe { simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4())) }
31840	}
31841
31842	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31843	///
31844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31845	#[inline]
31846	#[target_feature(enable = "avx512f,avx512vl")]
31847	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31848	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31849	pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31850	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31851	}
31852
31853	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31854	///
31855	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31856	#[inline]
31857	#[target_feature(enable = "avx512f")]
31858	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31859	#[rustc_legacy_const_generics(`2`)]
31860	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31861	pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31862	unsafe {
31863	static_assert_uimm_bits!(IMM3, `3`);
31864	let a: i32x16 = a.as_i32x16();
31865	let b: i32x16 = b.as_i32x16();
31866	let r: i32x16 = match IMM3 {
31867	`0` => simd_eq(x:a, y:b),
31868	`1` => simd_lt(x:a, y:b),
31869	`2` => simd_le(x:a, y:b),
31870	`3` => i32x16::ZERO,
31871	`4` => simd_ne(x:a, y:b),
31872	`5` => simd_ge(x:a, y:b),
31873	`6` => simd_gt(x:a, y:b),
31874	_ => i32x16::splat(`-1`),
31875	};
31876	simd_bitmask(r)
31877	}
31878	}
31879
31880	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881	///
31882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31883	#[inline]
31884	#[target_feature(enable = "avx512f")]
31885	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31886	#[rustc_legacy_const_generics(`3`)]
31887	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31888	pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31889	k1: __mmask16,
31890	a: __m512i,
31891	b: __m512i,
31892	) -> __mmask16 {
31893	unsafe {
31894	static_assert_uimm_bits!(IMM3, `3`);
31895	let a: i32x16 = a.as_i32x16();
31896	let b: i32x16 = b.as_i32x16();
31897	let k1: i32x16 = simd_select_bitmask(m:k1, yes:i32x16::splat(`-1`), no:i32x16::ZERO);
31898	let r: i32x16 = match IMM3 {
31899	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31900	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31901	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
31902	`3` => i32x16::ZERO,
31903	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31904	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31905	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31906	_ => k1,
31907	};
31908	simd_bitmask(r)
31909	}
31910	}
31911
31912	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31913	///
31914	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31915	#[inline]
31916	#[target_feature(enable = "avx512f,avx512vl")]
31917	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31918	#[rustc_legacy_const_generics(`2`)]
31919	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31920	pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31921	unsafe {
31922	static_assert_uimm_bits!(IMM3, `3`);
31923	let a: i32x8 = a.as_i32x8();
31924	let b: i32x8 = b.as_i32x8();
31925	let r: i32x8 = match IMM3 {
31926	`0` => simd_eq(x:a, y:b),
31927	`1` => simd_lt(x:a, y:b),
31928	`2` => simd_le(x:a, y:b),
31929	`3` => i32x8::ZERO,
31930	`4` => simd_ne(x:a, y:b),
31931	`5` => simd_ge(x:a, y:b),
31932	`6` => simd_gt(x:a, y:b),
31933	_ => i32x8::splat(`-1`),
31934	};
31935	simd_bitmask(r)
31936	}
31937	}
31938
31939	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31940	///
31941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31942	#[inline]
31943	#[target_feature(enable = "avx512f,avx512vl")]
31944	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31945	#[rustc_legacy_const_generics(`3`)]
31946	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31947	pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31948	k1: __mmask8,
31949	a: __m256i,
31950	b: __m256i,
31951	) -> __mmask8 {
31952	unsafe {
31953	static_assert_uimm_bits!(IMM3, `3`);
31954	let a: i32x8 = a.as_i32x8();
31955	let b: i32x8 = b.as_i32x8();
31956	let k1: i32x8 = simd_select_bitmask(m:k1, yes:i32x8::splat(`-1`), no:i32x8::ZERO);
31957	let r: i32x8 = match IMM3 {
31958	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31959	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31960	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
31961	`3` => i32x8::ZERO,
31962	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31963	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31964	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31965	_ => k1,
31966	};
31967	simd_bitmask(r)
31968	}
31969	}
31970
31971	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31972	///
31973	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31974	#[inline]
31975	#[target_feature(enable = "avx512f,avx512vl")]
31976	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31977	#[rustc_legacy_const_generics(`2`)]
31978	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
31979	pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31980	unsafe {
31981	static_assert_uimm_bits!(IMM3, `3`);
31982	let a: i32x4 = a.as_i32x4();
31983	let b: i32x4 = b.as_i32x4();
31984	let r: i32x4 = match IMM3 {
31985	`0` => simd_eq(x:a, y:b),
31986	`1` => simd_lt(x:a, y:b),
31987	`2` => simd_le(x:a, y:b),
31988	`3` => i32x4::ZERO,
31989	`4` => simd_ne(x:a, y:b),
31990	`5` => simd_ge(x:a, y:b),
31991	`6` => simd_gt(x:a, y:b),
31992	_ => i32x4::splat(`-1`),
31993	};
31994	simd_bitmask(r)
31995	}
31996	}
31997
31998	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31999	///
32000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32001	#[inline]
32002	#[target_feature(enable = "avx512f,avx512vl")]
32003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32004	#[rustc_legacy_const_generics(`3`)]
32005	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32006	pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32007	k1: __mmask8,
32008	a: __m128i,
32009	b: __m128i,
32010	) -> __mmask8 {
32011	unsafe {
32012	static_assert_uimm_bits!(IMM3, `3`);
32013	let a: i32x4 = a.as_i32x4();
32014	let b: i32x4 = b.as_i32x4();
32015	let k1: i32x4 = simd_select_bitmask(m:k1, yes:i32x4::splat(`-1`), no:i32x4::ZERO);
32016	let r: i32x4 = match IMM3 {
32017	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32018	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32019	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32020	`3` => i32x4::ZERO,
32021	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32022	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32023	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32024	_ => k1,
32025	};
32026	simd_bitmask(r)
32027	}
32028	}
32029
32030	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32031	///
32032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32033	#[inline]
32034	#[target_feature(enable = "avx512f")]
32035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32036	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32037	pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32038	unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8())) }
32039	}
32040
32041	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32042	///
32043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32044	#[inline]
32045	#[target_feature(enable = "avx512f")]
32046	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32047	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32048	pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32049	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32050	}
32051
32052	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32053	///
32054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32055	#[inline]
32056	#[target_feature(enable = "avx512f,avx512vl")]
32057	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32058	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32059	pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32060	unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4())) }
32061	}
32062
32063	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32064	///
32065	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32066	#[inline]
32067	#[target_feature(enable = "avx512f,avx512vl")]
32068	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32069	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32070	pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32071	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32072	}
32073
32074	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32075	///
32076	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32077	#[inline]
32078	#[target_feature(enable = "avx512f,avx512vl")]
32079	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32080	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32081	pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32082	unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2())) }
32083	}
32084
32085	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32086	///
32087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32088	#[inline]
32089	#[target_feature(enable = "avx512f,avx512vl")]
32090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32091	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32092	pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32093	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32094	}
32095
32096	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32097	///
32098	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32099	#[inline]
32100	#[target_feature(enable = "avx512f")]
32101	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32102	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32103	pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32104	unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8())) }
32105	}
32106
32107	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32108	///
32109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32110	#[inline]
32111	#[target_feature(enable = "avx512f")]
32112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32113	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32114	pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32115	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32116	}
32117
32118	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32119	///
32120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32121	#[inline]
32122	#[target_feature(enable = "avx512f,avx512vl")]
32123	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32124	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32125	pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32126	unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4())) }
32127	}
32128
32129	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32130	///
32131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32132	#[inline]
32133	#[target_feature(enable = "avx512f,avx512vl")]
32134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32135	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32136	pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32137	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32138	}
32139
32140	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32141	///
32142	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32143	#[inline]
32144	#[target_feature(enable = "avx512f,avx512vl")]
32145	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32146	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32147	pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32148	unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2())) }
32149	}
32150
32151	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32152	///
32153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32154	#[inline]
32155	#[target_feature(enable = "avx512f,avx512vl")]
32156	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32157	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32158	pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32159	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32160	}
32161
32162	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32163	///
32164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32165	#[inline]
32166	#[target_feature(enable = "avx512f")]
32167	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32168	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32169	pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32170	unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8())) }
32171	}
32172
32173	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32174	///
32175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32176	#[inline]
32177	#[target_feature(enable = "avx512f")]
32178	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32179	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32180	pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32181	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32182	}
32183
32184	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32185	///
32186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32187	#[inline]
32188	#[target_feature(enable = "avx512f,avx512vl")]
32189	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32190	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32191	pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32192	unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4())) }
32193	}
32194
32195	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32196	///
32197	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32198	#[inline]
32199	#[target_feature(enable = "avx512f,avx512vl")]
32200	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32201	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32202	pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32203	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32204	}
32205
32206	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32207	///
32208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32209	#[inline]
32210	#[target_feature(enable = "avx512f,avx512vl")]
32211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32212	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32213	pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32214	unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2())) }
32215	}
32216
32217	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32218	///
32219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32220	#[inline]
32221	#[target_feature(enable = "avx512f,avx512vl")]
32222	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32223	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32224	pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32225	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32226	}
32227
32228	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32229	///
32230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32231	#[inline]
32232	#[target_feature(enable = "avx512f")]
32233	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32234	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32235	pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32236	unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8())) }
32237	}
32238
32239	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32240	///
32241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32242	#[inline]
32243	#[target_feature(enable = "avx512f")]
32244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32245	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32246	pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32247	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32248	}
32249
32250	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32251	///
32252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32253	#[inline]
32254	#[target_feature(enable = "avx512f,avx512vl")]
32255	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32256	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32257	pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32258	unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4())) }
32259	}
32260
32261	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32262	///
32263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32264	#[inline]
32265	#[target_feature(enable = "avx512f,avx512vl")]
32266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32267	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32268	pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32269	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32270	}
32271
32272	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32273	///
32274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32275	#[inline]
32276	#[target_feature(enable = "avx512f,avx512vl")]
32277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32278	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32279	pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32280	unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2())) }
32281	}
32282
32283	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32284	///
32285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32286	#[inline]
32287	#[target_feature(enable = "avx512f,avx512vl")]
32288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32289	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32290	pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32291	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32292	}
32293
32294	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32295	///
32296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32297	#[inline]
32298	#[target_feature(enable = "avx512f")]
32299	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32300	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32301	pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32302	unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8())) }
32303	}
32304
32305	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32306	///
32307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32308	#[inline]
32309	#[target_feature(enable = "avx512f")]
32310	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32311	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32312	pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32313	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32314	}
32315
32316	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32317	///
32318	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32319	#[inline]
32320	#[target_feature(enable = "avx512f,avx512vl")]
32321	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32322	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32323	pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32324	unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4())) }
32325	}
32326
32327	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32328	///
32329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32330	#[inline]
32331	#[target_feature(enable = "avx512f,avx512vl")]
32332	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32333	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32334	pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32335	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32336	}
32337
32338	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32339	///
32340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32341	#[inline]
32342	#[target_feature(enable = "avx512f,avx512vl")]
32343	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32344	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32345	pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32346	unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2())) }
32347	}
32348
32349	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32350	///
32351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32352	#[inline]
32353	#[target_feature(enable = "avx512f,avx512vl")]
32354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32355	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32356	pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32357	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32358	}
32359
32360	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32361	///
32362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32363	#[inline]
32364	#[target_feature(enable = "avx512f")]
32365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32366	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32367	pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32368	unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8())) }
32369	}
32370
32371	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32372	///
32373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32374	#[inline]
32375	#[target_feature(enable = "avx512f")]
32376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32377	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32378	pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32379	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32380	}
32381
32382	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32383	///
32384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32385	#[inline]
32386	#[target_feature(enable = "avx512f,avx512vl")]
32387	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32388	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32389	pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32390	unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4())) }
32391	}
32392
32393	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32394	///
32395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32396	#[inline]
32397	#[target_feature(enable = "avx512f,avx512vl")]
32398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32399	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32400	pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32401	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32402	}
32403
32404	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32405	///
32406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32407	#[inline]
32408	#[target_feature(enable = "avx512f,avx512vl")]
32409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32410	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32411	pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32412	unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2())) }
32413	}
32414
32415	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32416	///
32417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32418	#[inline]
32419	#[target_feature(enable = "avx512f,avx512vl")]
32420	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32421	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32422	pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32423	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32424	}
32425
32426	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32427	///
32428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32429	#[inline]
32430	#[target_feature(enable = "avx512f")]
32431	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32432	#[rustc_legacy_const_generics(`2`)]
32433	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32434	pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32435	unsafe {
32436	static_assert_uimm_bits!(IMM3, `3`);
32437	let a: u64x8 = a.as_u64x8();
32438	let b: u64x8 = b.as_u64x8();
32439	let r: i64x8 = match IMM3 {
32440	`0` => simd_eq(x:a, y:b),
32441	`1` => simd_lt(x:a, y:b),
32442	`2` => simd_le(x:a, y:b),
32443	`3` => i64x8::ZERO,
32444	`4` => simd_ne(x:a, y:b),
32445	`5` => simd_ge(x:a, y:b),
32446	`6` => simd_gt(x:a, y:b),
32447	_ => i64x8::splat(`-1`),
32448	};
32449	simd_bitmask(r)
32450	}
32451	}
32452
32453	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32454	///
32455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32456	#[inline]
32457	#[target_feature(enable = "avx512f")]
32458	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32459	#[rustc_legacy_const_generics(`3`)]
32460	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32461	pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32462	k1: __mmask8,
32463	a: __m512i,
32464	b: __m512i,
32465	) -> __mmask8 {
32466	unsafe {
32467	static_assert_uimm_bits!(IMM3, `3`);
32468	let a: u64x8 = a.as_u64x8();
32469	let b: u64x8 = b.as_u64x8();
32470	let k1: i64x8 = simd_select_bitmask(m:k1, yes:i64x8::splat(`-1`), no:i64x8::ZERO);
32471	let r: i64x8 = match IMM3 {
32472	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32473	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32474	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32475	`3` => i64x8::ZERO,
32476	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32477	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32478	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32479	_ => k1,
32480	};
32481	simd_bitmask(r)
32482	}
32483	}
32484
32485	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32486	///
32487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32488	#[inline]
32489	#[target_feature(enable = "avx512f,avx512vl")]
32490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32491	#[rustc_legacy_const_generics(`2`)]
32492	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32493	pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32494	unsafe {
32495	static_assert_uimm_bits!(IMM3, `3`);
32496	let a: u64x4 = a.as_u64x4();
32497	let b: u64x4 = b.as_u64x4();
32498	let r: i64x4 = match IMM3 {
32499	`0` => simd_eq(x:a, y:b),
32500	`1` => simd_lt(x:a, y:b),
32501	`2` => simd_le(x:a, y:b),
32502	`3` => i64x4::ZERO,
32503	`4` => simd_ne(x:a, y:b),
32504	`5` => simd_ge(x:a, y:b),
32505	`6` => simd_gt(x:a, y:b),
32506	_ => i64x4::splat(`-1`),
32507	};
32508	simd_bitmask(r)
32509	}
32510	}
32511
32512	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32513	///
32514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32515	#[inline]
32516	#[target_feature(enable = "avx512f,avx512vl")]
32517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32518	#[rustc_legacy_const_generics(`3`)]
32519	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32520	pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32521	k1: __mmask8,
32522	a: __m256i,
32523	b: __m256i,
32524	) -> __mmask8 {
32525	unsafe {
32526	static_assert_uimm_bits!(IMM3, `3`);
32527	let a: u64x4 = a.as_u64x4();
32528	let b: u64x4 = b.as_u64x4();
32529	let k1: i64x4 = simd_select_bitmask(m:k1, yes:i64x4::splat(`-1`), no:i64x4::ZERO);
32530	let r: i64x4 = match IMM3 {
32531	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32532	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32533	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32534	`3` => i64x4::ZERO,
32535	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32536	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32537	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32538	_ => k1,
32539	};
32540	simd_bitmask(r)
32541	}
32542	}
32543
32544	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32545	///
32546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32547	#[inline]
32548	#[target_feature(enable = "avx512f,avx512vl")]
32549	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32550	#[rustc_legacy_const_generics(`2`)]
32551	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32552	pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32553	unsafe {
32554	static_assert_uimm_bits!(IMM3, `3`);
32555	let a: u64x2 = a.as_u64x2();
32556	let b: u64x2 = b.as_u64x2();
32557	let r: i64x2 = match IMM3 {
32558	`0` => simd_eq(x:a, y:b),
32559	`1` => simd_lt(x:a, y:b),
32560	`2` => simd_le(x:a, y:b),
32561	`3` => i64x2::ZERO,
32562	`4` => simd_ne(x:a, y:b),
32563	`5` => simd_ge(x:a, y:b),
32564	`6` => simd_gt(x:a, y:b),
32565	_ => i64x2::splat(`-1`),
32566	};
32567	simd_bitmask(r)
32568	}
32569	}
32570
32571	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32572	///
32573	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32574	#[inline]
32575	#[target_feature(enable = "avx512f,avx512vl")]
32576	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32577	#[rustc_legacy_const_generics(`3`)]
32578	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32579	pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32580	k1: __mmask8,
32581	a: __m128i,
32582	b: __m128i,
32583	) -> __mmask8 {
32584	unsafe {
32585	static_assert_uimm_bits!(IMM3, `3`);
32586	let a: u64x2 = a.as_u64x2();
32587	let b: u64x2 = b.as_u64x2();
32588	let k1: i64x2 = simd_select_bitmask(m:k1, yes:i64x2::splat(`-1`), no:i64x2::ZERO);
32589	let r: i64x2 = match IMM3 {
32590	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32591	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32592	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32593	`3` => i64x2::ZERO,
32594	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32595	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32596	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32597	_ => k1,
32598	};
32599	simd_bitmask(r)
32600	}
32601	}
32602
32603	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32604	///
32605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32606	#[inline]
32607	#[target_feature(enable = "avx512f")]
32608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32609	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32610	pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32611	unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8())) }
32612	}
32613
32614	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32615	///
32616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32617	#[inline]
32618	#[target_feature(enable = "avx512f")]
32619	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32620	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32621	pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32622	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32623	}
32624
32625	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32626	///
32627	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32628	#[inline]
32629	#[target_feature(enable = "avx512f,avx512vl")]
32630	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32631	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32632	pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32633	unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4())) }
32634	}
32635
32636	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637	///
32638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32639	#[inline]
32640	#[target_feature(enable = "avx512f,avx512vl")]
32641	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32642	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32643	pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32644	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32645	}
32646
32647	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32648	///
32649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32650	#[inline]
32651	#[target_feature(enable = "avx512f,avx512vl")]
32652	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32653	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32654	pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32655	unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2())) }
32656	}
32657
32658	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32659	///
32660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32661	#[inline]
32662	#[target_feature(enable = "avx512f,avx512vl")]
32663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32664	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32665	pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32666	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32667	}
32668
32669	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32670	///
32671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32672	#[inline]
32673	#[target_feature(enable = "avx512f")]
32674	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32675	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32676	pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32677	unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8())) }
32678	}
32679
32680	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32681	///
32682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32683	#[inline]
32684	#[target_feature(enable = "avx512f")]
32685	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32686	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32687	pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32688	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32689	}
32690
32691	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32692	///
32693	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32694	#[inline]
32695	#[target_feature(enable = "avx512f,avx512vl")]
32696	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32697	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32698	pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32699	unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
32700	}
32701
32702	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32703	///
32704	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32705	#[inline]
32706	#[target_feature(enable = "avx512f,avx512vl")]
32707	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32708	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32709	pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32710	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32711	}
32712
32713	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32714	///
32715	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32716	#[inline]
32717	#[target_feature(enable = "avx512f,avx512vl")]
32718	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32719	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32720	pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32721	unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2())) }
32722	}
32723
32724	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32725	///
32726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32727	#[inline]
32728	#[target_feature(enable = "avx512f,avx512vl")]
32729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32730	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32731	pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32732	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32733	}
32734
32735	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32736	///
32737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32738	#[inline]
32739	#[target_feature(enable = "avx512f")]
32740	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32741	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32742	pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32743	unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8())) }
32744	}
32745
32746	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32747	///
32748	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32749	#[inline]
32750	#[target_feature(enable = "avx512f")]
32751	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32752	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32753	pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32754	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32755	}
32756
32757	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32758	///
32759	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32760	#[inline]
32761	#[target_feature(enable = "avx512f,avx512vl")]
32762	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32763	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32764	pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32765	unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4())) }
32766	}
32767
32768	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32769	///
32770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32771	#[inline]
32772	#[target_feature(enable = "avx512f,avx512vl")]
32773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32774	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32775	pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32776	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32777	}
32778
32779	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32780	///
32781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32782	#[inline]
32783	#[target_feature(enable = "avx512f,avx512vl")]
32784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32785	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32786	pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32787	unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2())) }
32788	}
32789
32790	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32791	///
32792	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32793	#[inline]
32794	#[target_feature(enable = "avx512f,avx512vl")]
32795	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32796	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32797	pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32798	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32799	}
32800
32801	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32802	///
32803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32804	#[inline]
32805	#[target_feature(enable = "avx512f")]
32806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32807	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32808	pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32809	unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8())) }
32810	}
32811
32812	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32813	///
32814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32815	#[inline]
32816	#[target_feature(enable = "avx512f")]
32817	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32818	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32819	pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32820	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32821	}
32822
32823	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32824	///
32825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32826	#[inline]
32827	#[target_feature(enable = "avx512f,avx512vl")]
32828	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32829	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32830	pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32831	unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4())) }
32832	}
32833
32834	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32835	///
32836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32837	#[inline]
32838	#[target_feature(enable = "avx512f,avx512vl")]
32839	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32840	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32841	pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32842	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32843	}
32844
32845	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32846	///
32847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32848	#[inline]
32849	#[target_feature(enable = "avx512f,avx512vl")]
32850	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32851	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32852	pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32853	unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2())) }
32854	}
32855
32856	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32857	///
32858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32859	#[inline]
32860	#[target_feature(enable = "avx512f,avx512vl")]
32861	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32862	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32863	pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32864	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32865	}
32866
32867	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32868	///
32869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32870	#[inline]
32871	#[target_feature(enable = "avx512f")]
32872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32873	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32874	pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32875	unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8())) }
32876	}
32877
32878	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32879	///
32880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32881	#[inline]
32882	#[target_feature(enable = "avx512f")]
32883	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32884	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32885	pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32886	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32887	}
32888
32889	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32890	///
32891	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32892	#[inline]
32893	#[target_feature(enable = "avx512f,avx512vl")]
32894	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32895	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32896	pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32897	unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
32898	}
32899
32900	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32901	///
32902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32903	#[inline]
32904	#[target_feature(enable = "avx512f,avx512vl")]
32905	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32906	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32907	pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32908	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32909	}
32910
32911	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32912	///
32913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32914	#[inline]
32915	#[target_feature(enable = "avx512f,avx512vl")]
32916	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32917	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32918	pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32919	unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2())) }
32920	}
32921
32922	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32923	///
32924	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32925	#[inline]
32926	#[target_feature(enable = "avx512f,avx512vl")]
32927	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32928	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32929	pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32930	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32931	}
32932
32933	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32934	///
32935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32936	#[inline]
32937	#[target_feature(enable = "avx512f")]
32938	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32939	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32940	pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32941	unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8())) }
32942	}
32943
32944	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32945	///
32946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32947	#[inline]
32948	#[target_feature(enable = "avx512f")]
32949	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32950	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32951	pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32952	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32953	}
32954
32955	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32956	///
32957	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32958	#[inline]
32959	#[target_feature(enable = "avx512f,avx512vl")]
32960	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32961	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32962	pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32963	unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4())) }
32964	}
32965
32966	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32967	///
32968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32969	#[inline]
32970	#[target_feature(enable = "avx512f,avx512vl")]
32971	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32972	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32973	pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32974	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32975	}
32976
32977	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32978	///
32979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
32980	#[inline]
32981	#[target_feature(enable = "avx512f,avx512vl")]
32982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32983	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32984	pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32985	unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2())) }
32986	}
32987
32988	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32989	///
32990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
32991	#[inline]
32992	#[target_feature(enable = "avx512f,avx512vl")]
32993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32994	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32995	pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32996	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32997	}
32998
32999	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33000	///
33001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33002	#[inline]
33003	#[target_feature(enable = "avx512f")]
33004	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33005	#[rustc_legacy_const_generics(`2`)]
33006	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33007	pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33008	unsafe {
33009	static_assert_uimm_bits!(IMM3, `3`);
33010	let a: i64x8 = a.as_i64x8();
33011	let b: i64x8 = b.as_i64x8();
33012	let r: i64x8 = match IMM3 {
33013	`0` => simd_eq(x:a, y:b),
33014	`1` => simd_lt(x:a, y:b),
33015	`2` => simd_le(x:a, y:b),
33016	`3` => i64x8::ZERO,
33017	`4` => simd_ne(x:a, y:b),
33018	`5` => simd_ge(x:a, y:b),
33019	`6` => simd_gt(x:a, y:b),
33020	_ => i64x8::splat(`-1`),
33021	};
33022	simd_bitmask(r)
33023	}
33024	}
33025
33026	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33027	///
33028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33029	#[inline]
33030	#[target_feature(enable = "avx512f")]
33031	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33032	#[rustc_legacy_const_generics(`3`)]
33033	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33034	pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33035	k1: __mmask8,
33036	a: __m512i,
33037	b: __m512i,
33038	) -> __mmask8 {
33039	unsafe {
33040	static_assert_uimm_bits!(IMM3, `3`);
33041	let a: i64x8 = a.as_i64x8();
33042	let b: i64x8 = b.as_i64x8();
33043	let k1: i64x8 = simd_select_bitmask(m:k1, yes:i64x8::splat(`-1`), no:i64x8::ZERO);
33044	let r: i64x8 = match IMM3 {
33045	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33046	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33047	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33048	`3` => i64x8::ZERO,
33049	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33050	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33051	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33052	_ => k1,
33053	};
33054	simd_bitmask(r)
33055	}
33056	}
33057
33058	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33059	///
33060	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33061	#[inline]
33062	#[target_feature(enable = "avx512f,avx512vl")]
33063	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33064	#[rustc_legacy_const_generics(`2`)]
33065	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33066	pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33067	unsafe {
33068	static_assert_uimm_bits!(IMM3, `3`);
33069	let a: i64x4 = a.as_i64x4();
33070	let b: i64x4 = b.as_i64x4();
33071	let r: i64x4 = match IMM3 {
33072	`0` => simd_eq(x:a, y:b),
33073	`1` => simd_lt(x:a, y:b),
33074	`2` => simd_le(x:a, y:b),
33075	`3` => i64x4::ZERO,
33076	`4` => simd_ne(x:a, y:b),
33077	`5` => simd_ge(x:a, y:b),
33078	`6` => simd_gt(x:a, y:b),
33079	_ => i64x4::splat(`-1`),
33080	};
33081	simd_bitmask(r)
33082	}
33083	}
33084
33085	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33086	///
33087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33088	#[inline]
33089	#[target_feature(enable = "avx512f,avx512vl")]
33090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33091	#[rustc_legacy_const_generics(`3`)]
33092	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33093	pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33094	k1: __mmask8,
33095	a: __m256i,
33096	b: __m256i,
33097	) -> __mmask8 {
33098	unsafe {
33099	static_assert_uimm_bits!(IMM3, `3`);
33100	let a: i64x4 = a.as_i64x4();
33101	let b: i64x4 = b.as_i64x4();
33102	let k1: i64x4 = simd_select_bitmask(m:k1, yes:i64x4::splat(`-1`), no:i64x4::ZERO);
33103	let r: i64x4 = match IMM3 {
33104	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33105	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33106	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33107	`3` => i64x4::ZERO,
33108	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33109	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33110	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33111	_ => k1,
33112	};
33113	simd_bitmask(r)
33114	}
33115	}
33116
33117	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33118	///
33119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33120	#[inline]
33121	#[target_feature(enable = "avx512f,avx512vl")]
33122	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33123	#[rustc_legacy_const_generics(`2`)]
33124	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33125	pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33126	unsafe {
33127	static_assert_uimm_bits!(IMM3, `3`);
33128	let a: i64x2 = a.as_i64x2();
33129	let b: i64x2 = b.as_i64x2();
33130	let r: i64x2 = match IMM3 {
33131	`0` => simd_eq(x:a, y:b),
33132	`1` => simd_lt(x:a, y:b),
33133	`2` => simd_le(x:a, y:b),
33134	`3` => i64x2::ZERO,
33135	`4` => simd_ne(x:a, y:b),
33136	`5` => simd_ge(x:a, y:b),
33137	`6` => simd_gt(x:a, y:b),
33138	_ => i64x2::splat(`-1`),
33139	};
33140	simd_bitmask(r)
33141	}
33142	}
33143
33144	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33145	///
33146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33147	#[inline]
33148	#[target_feature(enable = "avx512f,avx512vl")]
33149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33150	#[rustc_legacy_const_generics(`3`)]
33151	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33152	pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33153	k1: __mmask8,
33154	a: __m128i,
33155	b: __m128i,
33156	) -> __mmask8 {
33157	unsafe {
33158	static_assert_uimm_bits!(IMM3, `3`);
33159	let a: i64x2 = a.as_i64x2();
33160	let b: i64x2 = b.as_i64x2();
33161	let k1: i64x2 = simd_select_bitmask(m:k1, yes:i64x2::splat(`-1`), no:i64x2::ZERO);
33162	let r: i64x2 = match IMM3 {
33163	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33164	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33165	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33166	`3` => i64x2::ZERO,
33167	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33168	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33169	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33170	_ => k1,
33171	};
33172	simd_bitmask(r)
33173	}
33174	}
33175
33176	/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33177	///
33178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33179	#[inline]
33180	#[target_feature(enable = "avx512f")]
33181	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33182	pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33183	unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33184	}
33185
33186	/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33187	///
33188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33189	#[inline]
33190	#[target_feature(enable = "avx512f")]
33191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33192	pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33193	unsafe { simd_reduce_add_unordered(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
33194	}
33195
33196	/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33197	///
33198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33199	#[inline]
33200	#[target_feature(enable = "avx512f")]
33201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33202	pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33203	unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33204	}
33205
33206	/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207	///
33208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33209	#[inline]
33210	#[target_feature(enable = "avx512f")]
33211	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33212	pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33213	unsafe { simd_reduce_add_unordered(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
33214	}
33215
33216	/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33217	///
33218	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33219	#[inline]
33220	#[target_feature(enable = "avx512f")]
33221	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33222	pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33223	unsafe {
33224	// we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33225	let a: __m256 = _mm256_add_ps(
33226	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
33227	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
33228	);
33229	let a: __m128 = _mm_add_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
33230	let a: __m128 = _mm_add_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
33231	simd_extract::<_, f32>(x:a, idx:`0`) + simd_extract::<_, f32>(x:a, idx:`1`)
33232	}
33233	}
33234
33235	/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33236	///
33237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33238	#[inline]
33239	#[target_feature(enable = "avx512f")]
33240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33241	pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33242	unsafe { _mm512_reduce_add_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_ps())) }
33243	}
33244
33245	/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33246	///
33247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33248	#[inline]
33249	#[target_feature(enable = "avx512f")]
33250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33251	pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33252	unsafe {
33253	let a: __m256d = _mm256_add_pd(
33254	a:_mm512_extractf64x4_pd::<`0`>(a),
33255	b:_mm512_extractf64x4_pd::<`1`>(a),
33256	);
33257	let a: __m128d = _mm_add_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
33258	simd_extract::<_, f64>(x:a, idx:`0`) + simd_extract::<_, f64>(x:a, idx:`1`)
33259	}
33260	}
33261
33262	/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33263	///
33264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33265	#[inline]
33266	#[target_feature(enable = "avx512f")]
33267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33268	pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33269	unsafe { _mm512_reduce_add_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_pd())) }
33270	}
33271
33272	/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33273	///
33274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33275	#[inline]
33276	#[target_feature(enable = "avx512f")]
33277	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33278	pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33279	unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33280	}
33281
33282	/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33283	///
33284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33285	#[inline]
33286	#[target_feature(enable = "avx512f")]
33287	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33288	pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33289	unsafe {
33290	simd_reduce_mul_unordered(simd_select_bitmask(
33291	m:k,
33292	yes:a.as_i32x16(),
33293	no:_mm512_set1_epi32(`1`).as_i32x16(),
33294	))
33295	}
33296	}
33297
33298	/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33299	///
33300	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33301	#[inline]
33302	#[target_feature(enable = "avx512f")]
33303	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33304	pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33305	unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33306	}
33307
33308	/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33309	///
33310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33311	#[inline]
33312	#[target_feature(enable = "avx512f")]
33313	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33314	pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33315	unsafe {
33316	simd_reduce_mul_unordered(simd_select_bitmask(
33317	m:k,
33318	yes:a.as_i64x8(),
33319	no:_mm512_set1_epi64(`1`).as_i64x8(),
33320	))
33321	}
33322	}
33323
33324	/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33325	///
33326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33327	#[inline]
33328	#[target_feature(enable = "avx512f")]
33329	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33330	pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33331	unsafe {
33332	// we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33333	let a: __m256 = _mm256_mul_ps(
33334	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
33335	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
33336	);
33337	let a: __m128 = _mm_mul_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
33338	let a: __m128 = _mm_mul_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
33339	simd_extract::<_, f32>(x:a, idx:`0`) * simd_extract::<_, f32>(x:a, idx:`1`)
33340	}
33341	}
33342
33343	/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33344	///
33345	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33346	#[inline]
33347	#[target_feature(enable = "avx512f")]
33348	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33349	pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33350	unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_ps(`1.`))) }
33351	}
33352
33353	/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33354	///
33355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33356	#[inline]
33357	#[target_feature(enable = "avx512f")]
33358	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33359	pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33360	unsafe {
33361	let a: __m256d = _mm256_mul_pd(
33362	a:_mm512_extractf64x4_pd::<`0`>(a),
33363	b:_mm512_extractf64x4_pd::<`1`>(a),
33364	);
33365	let a: __m128d = _mm_mul_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
33366	simd_extract::<_, f64>(x:a, idx:`0`) * simd_extract::<_, f64>(x:a, idx:`1`)
33367	}
33368	}
33369
33370	/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33371	///
33372	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33373	#[inline]
33374	#[target_feature(enable = "avx512f")]
33375	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33376	pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33377	unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_pd(`1.`))) }
33378	}
33379
33380	/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33381	///
33382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33383	#[inline]
33384	#[target_feature(enable = "avx512f")]
33385	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33386	pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33387	unsafe { simd_reduce_max(a.as_i32x16()) }
33388	}
33389
33390	/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33391	///
33392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33393	#[inline]
33394	#[target_feature(enable = "avx512f")]
33395	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33396	pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33397	unsafe {
33398	simd_reduce_max(simd_select_bitmask(
33399	m:k,
33400	yes:a.as_i32x16(),
33401	no:i32x16::splat(i32::MIN),
33402	))
33403	}
33404	}
33405
33406	/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33407	///
33408	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33409	#[inline]
33410	#[target_feature(enable = "avx512f")]
33411	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33412	pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33413	unsafe { simd_reduce_max(a.as_i64x8()) }
33414	}
33415
33416	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33417	///
33418	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33419	#[inline]
33420	#[target_feature(enable = "avx512f")]
33421	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33422	pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33423	unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MIN))) }
33424	}
33425
33426	/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33427	///
33428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33429	#[inline]
33430	#[target_feature(enable = "avx512f")]
33431	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33432	pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33433	unsafe { simd_reduce_max(a.as_u32x16()) }
33434	}
33435
33436	/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437	///
33438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33439	#[inline]
33440	#[target_feature(enable = "avx512f")]
33441	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33442	pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33443	unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u32x16(), no:u32x16::ZERO)) }
33444	}
33445
33446	/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33447	///
33448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33449	#[inline]
33450	#[target_feature(enable = "avx512f")]
33451	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33452	pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33453	unsafe { simd_reduce_max(a.as_u64x8()) }
33454	}
33455
33456	/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457	///
33458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33459	#[inline]
33460	#[target_feature(enable = "avx512f")]
33461	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33462	pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33463	unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::ZERO)) }
33464	}
33465
33466	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33467	///
33468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33469	#[inline]
33470	#[target_feature(enable = "avx512f")]
33471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33472	pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33473	unsafe {
33474	let a: __m256 = _mm256_max_ps(
33475	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
33476	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
33477	);
33478	let a: __m128 = _mm_max_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
33479	let a: __m128 = _mm_max_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
33480	_mm_cvtss_f32(_mm_max_ss(a, b:_mm_movehdup_ps(a)))
33481	}
33482	}
33483
33484	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33485	///
33486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33487	#[inline]
33488	#[target_feature(enable = "avx512f")]
33489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33490	pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33491	_mm512_reduce_max_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MIN), k, a))
33492	}
33493
33494	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33495	///
33496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33497	#[inline]
33498	#[target_feature(enable = "avx512f")]
33499	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33500	pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33501	unsafe {
33502	let a: __m256d = _mm256_max_pd(
33503	a:_mm512_extractf64x4_pd::<`0`>(a),
33504	b:_mm512_extractf64x4_pd::<`1`>(a),
33505	);
33506	let a: __m128d = _mm_max_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
33507	_mm_cvtsd_f64(_mm_max_sd(a, b:simd_shuffle!(a, a, [`1`, `0`])))
33508	}
33509	}
33510
33511	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33512	///
33513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33514	#[inline]
33515	#[target_feature(enable = "avx512f")]
33516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33517	pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33518	_mm512_reduce_max_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MIN), k, a))
33519	}
33520
33521	/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33522	///
33523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33524	#[inline]
33525	#[target_feature(enable = "avx512f")]
33526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33527	pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33528	unsafe { simd_reduce_min(a.as_i32x16()) }
33529	}
33530
33531	/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33532	///
33533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33534	#[inline]
33535	#[target_feature(enable = "avx512f")]
33536	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33537	pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33538	unsafe {
33539	simd_reduce_min(simd_select_bitmask(
33540	m:k,
33541	yes:a.as_i32x16(),
33542	no:i32x16::splat(i32::MAX),
33543	))
33544	}
33545	}
33546
33547	/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33548	///
33549	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33550	#[inline]
33551	#[target_feature(enable = "avx512f")]
33552	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33553	pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33554	unsafe { simd_reduce_min(a.as_i64x8()) }
33555	}
33556
33557	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33558	///
33559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33560	#[inline]
33561	#[target_feature(enable = "avx512f")]
33562	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33563	pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33564	unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MAX))) }
33565	}
33566
33567	/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33568	///
33569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33570	#[inline]
33571	#[target_feature(enable = "avx512f")]
33572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33573	pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33574	unsafe { simd_reduce_min(a.as_u32x16()) }
33575	}
33576
33577	/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578	///
33579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33580	#[inline]
33581	#[target_feature(enable = "avx512f")]
33582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33583	pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33584	unsafe {
33585	simd_reduce_min(simd_select_bitmask(
33586	m:k,
33587	yes:a.as_u32x16(),
33588	no:u32x16::splat(u32::MAX),
33589	))
33590	}
33591	}
33592
33593	/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33594	///
33595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33596	#[inline]
33597	#[target_feature(enable = "avx512f")]
33598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33599	pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33600	unsafe { simd_reduce_min(a.as_u64x8()) }
33601	}
33602
33603	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33604	///
33605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33606	#[inline]
33607	#[target_feature(enable = "avx512f")]
33608	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33609	pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33610	unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::splat(u64::MAX))) }
33611	}
33612
33613	/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33614	///
33615	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33616	#[inline]
33617	#[target_feature(enable = "avx512f")]
33618	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33619	pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33620	unsafe {
33621	let a: __m256 = _mm256_min_ps(
33622	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
33623	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
33624	);
33625	let a: __m128 = _mm_min_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
33626	let a: __m128 = _mm_min_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
33627	_mm_cvtss_f32(_mm_min_ss(a, b:_mm_movehdup_ps(a)))
33628	}
33629	}
33630
33631	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33632	///
33633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33634	#[inline]
33635	#[target_feature(enable = "avx512f")]
33636	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33637	pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33638	_mm512_reduce_min_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MAX), k, a))
33639	}
33640
33641	/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33642	///
33643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33644	#[inline]
33645	#[target_feature(enable = "avx512f")]
33646	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33647	pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33648	unsafe {
33649	let a: __m256d = _mm256_min_pd(
33650	a:_mm512_extractf64x4_pd::<`0`>(a),
33651	b:_mm512_extractf64x4_pd::<`1`>(a),
33652	);
33653	let a: __m128d = _mm_min_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
33654	_mm_cvtsd_f64(_mm_min_sd(a, b:simd_shuffle!(a, a, [`1`, `0`])))
33655	}
33656	}
33657
33658	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33659	///
33660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33661	#[inline]
33662	#[target_feature(enable = "avx512f")]
33663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33664	pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33665	_mm512_reduce_min_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MAX), k, a))
33666	}
33667
33668	/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33669	///
33670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33671	#[inline]
33672	#[target_feature(enable = "avx512f")]
33673	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33674	pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33675	unsafe { simd_reduce_and(a.as_i32x16()) }
33676	}
33677
33678	/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33679	///
33680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33681	#[inline]
33682	#[target_feature(enable = "avx512f")]
33683	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33684	pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33685	unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::splat(`-1`))) }
33686	}
33687
33688	/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689	///
33690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33691	#[inline]
33692	#[target_feature(enable = "avx512f")]
33693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33694	pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33695	unsafe { simd_reduce_and(a.as_i64x8()) }
33696	}
33697
33698	/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33699	///
33700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33701	#[inline]
33702	#[target_feature(enable = "avx512f")]
33703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33704	pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33705	unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(`-1`))) }
33706	}
33707
33708	/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33709	///
33710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33711	#[inline]
33712	#[target_feature(enable = "avx512f")]
33713	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33714	pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33715	unsafe { simd_reduce_or(a.as_i32x16()) }
33716	}
33717
33718	/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33719	///
33720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33721	#[inline]
33722	#[target_feature(enable = "avx512f")]
33723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33724	pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33725	unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
33726	}
33727
33728	/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729	///
33730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33731	#[inline]
33732	#[target_feature(enable = "avx512f")]
33733	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734	pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33735	unsafe { simd_reduce_or(a.as_i64x8()) }
33736	}
33737
33738	/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739	///
33740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33741	#[inline]
33742	#[target_feature(enable = "avx512f")]
33743	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33744	pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33745	unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
33746	}
33747
33748	/// Returns vector of type `__m512d` with indeterminate elements.
33749	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33750	/// In practice, this is equivalent to [`mem::zeroed`].
33751	///
33752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33753	#[inline]
33754	#[target_feature(enable = "avx512f")]
33755	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33756	// This intrinsic has no corresponding instruction.
33757	pub fn _mm512_undefined_pd() -> __m512d {
33758	unsafe { const { mem::zeroed() } }
33759	}
33760
33761	/// Returns vector of type `__m512` with indeterminate elements.
33762	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33763	/// In practice, this is equivalent to [`mem::zeroed`].
33764	///
33765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33766	#[inline]
33767	#[target_feature(enable = "avx512f")]
33768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33769	// This intrinsic has no corresponding instruction.
33770	pub fn _mm512_undefined_ps() -> __m512 {
33771	unsafe { const { mem::zeroed() } }
33772	}
33773
33774	/// Return vector of type __m512i with indeterminate elements.
33775	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33776	/// In practice, this is equivalent to [`mem::zeroed`].
33777	///
33778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33779	#[inline]
33780	#[target_feature(enable = "avx512f")]
33781	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33782	// This intrinsic has no corresponding instruction.
33783	pub fn _mm512_undefined_epi32() -> __m512i {
33784	unsafe { const { mem::zeroed() } }
33785	}
33786
33787	/// Return vector of type __m512 with indeterminate elements.
33788	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33789	/// In practice, this is equivalent to [`mem::zeroed`].
33790	///
33791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33792	#[inline]
33793	#[target_feature(enable = "avx512f")]
33794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33795	// This intrinsic has no corresponding instruction.
33796	pub fn _mm512_undefined() -> __m512 {
33797	unsafe { const { mem::zeroed() } }
33798	}
33799
33800	/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33801	///
33802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33803	#[inline]
33804	#[target_feature(enable = "avx512f")]
33805	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33806	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33807	pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33808	ptr::read_unaligned(src:mem_addr as *const __m512i)
33809	}
33810
33811	/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33812	///
33813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33814	#[inline]
33815	#[target_feature(enable = "avx512f,avx512vl")]
33816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33817	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33818	pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33819	ptr::read_unaligned(src:mem_addr as *const __m256i)
33820	}
33821
33822	/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33823	///
33824	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33825	#[inline]
33826	#[target_feature(enable = "avx512f,avx512vl")]
33827	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33828	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33829	pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33830	ptr::read_unaligned(src:mem_addr as *const __m128i)
33831	}
33832
33833	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33834	///
33835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33836	#[inline]
33837	#[target_feature(enable = "avx512f")]
33838	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33839	#[cfg_attr(test, assert_instr(vpmovdw))]
33840	pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33841	vpmovdwmem(mem_addr, a.as_i32x16(), mask:k);
33842	}
33843
33844	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33845	///
33846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33847	#[inline]
33848	#[target_feature(enable = "avx512f,avx512vl")]
33849	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33850	#[cfg_attr(test, assert_instr(vpmovdw))]
33851	pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33852	vpmovdwmem256(mem_addr, a.as_i32x8(), mask:k);
33853	}
33854
33855	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33856	///
33857	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33858	#[inline]
33859	#[target_feature(enable = "avx512f,avx512vl")]
33860	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33861	#[cfg_attr(test, assert_instr(vpmovdw))]
33862	pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33863	vpmovdwmem128(mem_addr, a.as_i32x4(), mask:k);
33864	}
33865
33866	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33867	///
33868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33869	#[inline]
33870	#[target_feature(enable = "avx512f")]
33871	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33872	#[cfg_attr(test, assert_instr(vpmovsdw))]
33873	pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33874	vpmovsdwmem(mem_addr, a.as_i32x16(), mask:k);
33875	}
33876
33877	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33878	///
33879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33880	#[inline]
33881	#[target_feature(enable = "avx512f,avx512vl")]
33882	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33883	#[cfg_attr(test, assert_instr(vpmovsdw))]
33884	pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33885	vpmovsdwmem256(mem_addr, a.as_i32x8(), mask:k);
33886	}
33887
33888	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33889	///
33890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33891	#[inline]
33892	#[target_feature(enable = "avx512f,avx512vl")]
33893	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33894	#[cfg_attr(test, assert_instr(vpmovsdw))]
33895	pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33896	vpmovsdwmem128(mem_addr, a.as_i32x4(), mask:k);
33897	}
33898
33899	/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33900	///
33901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33902	#[inline]
33903	#[target_feature(enable = "avx512f")]
33904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33905	#[cfg_attr(test, assert_instr(vpmovusdw))]
33906	pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33907	vpmovusdwmem(mem_addr, a.as_i32x16(), mask:k);
33908	}
33909
33910	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33911	///
33912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33913	#[inline]
33914	#[target_feature(enable = "avx512f,avx512vl")]
33915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33916	#[cfg_attr(test, assert_instr(vpmovusdw))]
33917	pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33918	vpmovusdwmem256(mem_addr, a.as_i32x8(), mask:k);
33919	}
33920
33921	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33922	///
33923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33924	#[inline]
33925	#[target_feature(enable = "avx512f,avx512vl")]
33926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33927	#[cfg_attr(test, assert_instr(vpmovusdw))]
33928	pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33929	vpmovusdwmem128(mem_addr, a.as_i32x4(), mask:k);
33930	}
33931
33932	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33933	///
33934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33935	#[inline]
33936	#[target_feature(enable = "avx512f")]
33937	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33938	#[cfg_attr(test, assert_instr(vpmovdb))]
33939	pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33940	vpmovdbmem(mem_addr, a.as_i32x16(), mask:k);
33941	}
33942
33943	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33944	///
33945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33946	#[inline]
33947	#[target_feature(enable = "avx512f,avx512vl")]
33948	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33949	#[cfg_attr(test, assert_instr(vpmovdb))]
33950	pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33951	vpmovdbmem256(mem_addr, a.as_i32x8(), mask:k);
33952	}
33953
33954	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33955	///
33956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33957	#[inline]
33958	#[target_feature(enable = "avx512f,avx512vl")]
33959	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33960	#[cfg_attr(test, assert_instr(vpmovdb))]
33961	pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33962	vpmovdbmem128(mem_addr, a.as_i32x4(), mask:k);
33963	}
33964
33965	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33966	///
33967	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33968	#[inline]
33969	#[target_feature(enable = "avx512f")]
33970	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33971	#[cfg_attr(test, assert_instr(vpmovsdb))]
33972	pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33973	vpmovsdbmem(mem_addr, a.as_i32x16(), mask:k);
33974	}
33975
33976	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33977	///
33978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
33979	#[inline]
33980	#[target_feature(enable = "avx512f,avx512vl")]
33981	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33982	#[cfg_attr(test, assert_instr(vpmovsdb))]
33983	pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33984	vpmovsdbmem256(mem_addr, a.as_i32x8(), mask:k);
33985	}
33986
33987	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33988	///
33989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
33990	#[inline]
33991	#[target_feature(enable = "avx512f,avx512vl")]
33992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33993	#[cfg_attr(test, assert_instr(vpmovsdb))]
33994	pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33995	vpmovsdbmem128(mem_addr, a.as_i32x4(), mask:k);
33996	}
33997
33998	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33999	///
34000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34001	#[inline]
34002	#[target_feature(enable = "avx512f")]
34003	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34004	#[cfg_attr(test, assert_instr(vpmovusdb))]
34005	pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34006	vpmovusdbmem(mem_addr, a.as_i32x16(), mask:k);
34007	}
34008
34009	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34010	///
34011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34012	#[inline]
34013	#[target_feature(enable = "avx512f,avx512vl")]
34014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34015	#[cfg_attr(test, assert_instr(vpmovusdb))]
34016	pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34017	vpmovusdbmem256(mem_addr, a.as_i32x8(), mask:k);
34018	}
34019
34020	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34021	///
34022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34023	#[inline]
34024	#[target_feature(enable = "avx512f,avx512vl")]
34025	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34026	#[cfg_attr(test, assert_instr(vpmovusdb))]
34027	pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34028	vpmovusdbmem128(mem_addr, a.as_i32x4(), mask:k);
34029	}
34030
34031	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34032	///
34033	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34034	#[inline]
34035	#[target_feature(enable = "avx512f")]
34036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34037	#[cfg_attr(test, assert_instr(vpmovqw))]
34038	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34039	vpmovqwmem(mem_addr, a.as_i64x8(), mask:k);
34040	}
34041
34042	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34043	///
34044	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34045	#[inline]
34046	#[target_feature(enable = "avx512f,avx512vl")]
34047	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34048	#[cfg_attr(test, assert_instr(vpmovqw))]
34049	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34050	vpmovqwmem256(mem_addr, a.as_i64x4(), mask:k);
34051	}
34052
34053	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34054	///
34055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34056	#[inline]
34057	#[target_feature(enable = "avx512f,avx512vl")]
34058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34059	#[cfg_attr(test, assert_instr(vpmovqw))]
34060	pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34061	vpmovqwmem128(mem_addr, a.as_i64x2(), mask:k);
34062	}
34063
34064	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34065	///
34066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34067	#[inline]
34068	#[target_feature(enable = "avx512f")]
34069	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34070	#[cfg_attr(test, assert_instr(vpmovsqw))]
34071	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34072	vpmovsqwmem(mem_addr, a.as_i64x8(), mask:k);
34073	}
34074
34075	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34076	///
34077	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34078	#[inline]
34079	#[target_feature(enable = "avx512f,avx512vl")]
34080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34081	#[cfg_attr(test, assert_instr(vpmovsqw))]
34082	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34083	vpmovsqwmem256(mem_addr, a.as_i64x4(), mask:k);
34084	}
34085
34086	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34087	///
34088	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34089	#[inline]
34090	#[target_feature(enable = "avx512f,avx512vl")]
34091	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34092	#[cfg_attr(test, assert_instr(vpmovsqw))]
34093	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34094	vpmovsqwmem128(mem_addr, a.as_i64x2(), mask:k);
34095	}
34096
34097	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34098	///
34099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34100	#[inline]
34101	#[target_feature(enable = "avx512f")]
34102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34103	#[cfg_attr(test, assert_instr(vpmovusqw))]
34104	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34105	vpmovusqwmem(mem_addr, a.as_i64x8(), mask:k);
34106	}
34107
34108	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34109	///
34110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34111	#[inline]
34112	#[target_feature(enable = "avx512f,avx512vl")]
34113	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34114	#[cfg_attr(test, assert_instr(vpmovusqw))]
34115	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34116	vpmovusqwmem256(mem_addr, a.as_i64x4(), mask:k);
34117	}
34118
34119	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34120	///
34121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34122	#[inline]
34123	#[target_feature(enable = "avx512f,avx512vl")]
34124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34125	#[cfg_attr(test, assert_instr(vpmovusqw))]
34126	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34127	vpmovusqwmem128(mem_addr, a.as_i64x2(), mask:k);
34128	}
34129
34130	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34131	///
34132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34133	#[inline]
34134	#[target_feature(enable = "avx512f")]
34135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34136	#[cfg_attr(test, assert_instr(vpmovqb))]
34137	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34138	vpmovqbmem(mem_addr, a.as_i64x8(), mask:k);
34139	}
34140
34141	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34142	///
34143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34144	#[inline]
34145	#[target_feature(enable = "avx512f,avx512vl")]
34146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34147	#[cfg_attr(test, assert_instr(vpmovqb))]
34148	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34149	vpmovqbmem256(mem_addr, a.as_i64x4(), mask:k);
34150	}
34151
34152	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34153	///
34154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34155	#[inline]
34156	#[target_feature(enable = "avx512f,avx512vl")]
34157	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34158	#[cfg_attr(test, assert_instr(vpmovqb))]
34159	pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34160	vpmovqbmem128(mem_addr, a.as_i64x2(), mask:k);
34161	}
34162
34163	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34164	///
34165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34166	#[inline]
34167	#[target_feature(enable = "avx512f")]
34168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34169	#[cfg_attr(test, assert_instr(vpmovsqb))]
34170	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34171	vpmovsqbmem(mem_addr, a.as_i64x8(), mask:k);
34172	}
34173
34174	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34175	///
34176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34177	#[inline]
34178	#[target_feature(enable = "avx512f,avx512vl")]
34179	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34180	#[cfg_attr(test, assert_instr(vpmovsqb))]
34181	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34182	vpmovsqbmem256(mem_addr, a.as_i64x4(), mask:k);
34183	}
34184
34185	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34186	///
34187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34188	#[inline]
34189	#[target_feature(enable = "avx512f,avx512vl")]
34190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34191	#[cfg_attr(test, assert_instr(vpmovsqb))]
34192	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34193	vpmovsqbmem128(mem_addr, a.as_i64x2(), mask:k);
34194	}
34195
34196	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34197	///
34198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34199	#[inline]
34200	#[target_feature(enable = "avx512f")]
34201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34202	#[cfg_attr(test, assert_instr(vpmovusqb))]
34203	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34204	vpmovusqbmem(mem_addr, a.as_i64x8(), mask:k);
34205	}
34206
34207	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34208	///
34209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34210	#[inline]
34211	#[target_feature(enable = "avx512f,avx512vl")]
34212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34213	#[cfg_attr(test, assert_instr(vpmovusqb))]
34214	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34215	vpmovusqbmem256(mem_addr, a.as_i64x4(), mask:k);
34216	}
34217
34218	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34219	///
34220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34221	#[inline]
34222	#[target_feature(enable = "avx512f,avx512vl")]
34223	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34224	#[cfg_attr(test, assert_instr(vpmovusqb))]
34225	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34226	vpmovusqbmem128(mem_addr, a.as_i64x2(), mask:k);
34227	}
34228
34229	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34230	///
34231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34232	#[inline]
34233	#[target_feature(enable = "avx512f")]
34234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34235	#[cfg_attr(test, assert_instr(vpmovqd))]
34236	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34237	vpmovqdmem(mem_addr, a.as_i64x8(), mask:k);
34238	}
34239
34240	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34241	///
34242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34243	#[inline]
34244	#[target_feature(enable = "avx512f,avx512vl")]
34245	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34246	#[cfg_attr(test, assert_instr(vpmovqd))]
34247	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34248	vpmovqdmem256(mem_addr, a.as_i64x4(), mask:k);
34249	}
34250
34251	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34252	///
34253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34254	#[inline]
34255	#[target_feature(enable = "avx512f,avx512vl")]
34256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34257	#[cfg_attr(test, assert_instr(vpmovqd))]
34258	pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34259	vpmovqdmem128(mem_addr, a.as_i64x2(), mask:k);
34260	}
34261
34262	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34263	///
34264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34265	#[inline]
34266	#[target_feature(enable = "avx512f")]
34267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34268	#[cfg_attr(test, assert_instr(vpmovsqd))]
34269	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34270	vpmovsqdmem(mem_addr, a.as_i64x8(), mask:k);
34271	}
34272
34273	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34274	///
34275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34276	#[inline]
34277	#[target_feature(enable = "avx512f,avx512vl")]
34278	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34279	#[cfg_attr(test, assert_instr(vpmovsqd))]
34280	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34281	vpmovsqdmem256(mem_addr, a.as_i64x4(), mask:k);
34282	}
34283
34284	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34285	///
34286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34287	#[inline]
34288	#[target_feature(enable = "avx512f,avx512vl")]
34289	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34290	#[cfg_attr(test, assert_instr(vpmovsqd))]
34291	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34292	vpmovsqdmem128(mem_addr, a.as_i64x2(), mask:k);
34293	}
34294
34295	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34296	///
34297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34298	#[inline]
34299	#[target_feature(enable = "avx512f")]
34300	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34301	#[cfg_attr(test, assert_instr(vpmovusqd))]
34302	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34303	vpmovusqdmem(mem_addr, a.as_i64x8(), mask:k);
34304	}
34305
34306	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34307	///
34308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34309	#[inline]
34310	#[target_feature(enable = "avx512f,avx512vl")]
34311	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34312	#[cfg_attr(test, assert_instr(vpmovusqd))]
34313	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34314	vpmovusqdmem256(mem_addr, a.as_i64x4(), mask:k);
34315	}
34316
34317	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34318	///
34319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34320	#[inline]
34321	#[target_feature(enable = "avx512f,avx512vl")]
34322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34323	#[cfg_attr(test, assert_instr(vpmovusqd))]
34324	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34325	vpmovusqdmem128(mem_addr, a.as_i64x2(), mask:k);
34326	}
34327
34328	/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34329	///
34330	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34331	#[inline]
34332	#[target_feature(enable = "avx512f")]
34333	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34334	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34335	pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34336	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
34337	}
34338
34339	/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34340	///
34341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34342	#[inline]
34343	#[target_feature(enable = "avx512f,avx512vl")]
34344	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34345	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34346	pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34347	ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
34348	}
34349
34350	/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34351	///
34352	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34353	#[inline]
34354	#[target_feature(enable = "avx512f,avx512vl")]
34355	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34356	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34357	pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34358	ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
34359	}
34360
34361	/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34362	///
34363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34364	#[inline]
34365	#[target_feature(enable = "avx512f")]
34366	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34367	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34368	pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34369	ptr::read_unaligned(src:mem_addr as *const __m512i)
34370	}
34371
34372	/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34373	///
34374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34375	#[inline]
34376	#[target_feature(enable = "avx512f,avx512vl")]
34377	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34378	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34379	pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34380	ptr::read_unaligned(src:mem_addr as *const __m256i)
34381	}
34382
34383	/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34384	///
34385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34386	#[inline]
34387	#[target_feature(enable = "avx512f,avx512vl")]
34388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34389	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34390	pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34391	ptr::read_unaligned(src:mem_addr as *const __m128i)
34392	}
34393
34394	/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34395	///
34396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34397	#[inline]
34398	#[target_feature(enable = "avx512f")]
34399	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34400	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34401	pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34402	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
34403	}
34404
34405	/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34406	///
34407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34408	#[inline]
34409	#[target_feature(enable = "avx512f,avx512vl")]
34410	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34411	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34412	pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34413	ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
34414	}
34415
34416	/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34417	///
34418	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34419	#[inline]
34420	#[target_feature(enable = "avx512f,avx512vl")]
34421	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34422	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34423	pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34424	ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
34425	}
34426
34427	/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34428	///
34429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34430	#[inline]
34431	#[target_feature(enable = "avx512f")]
34432	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34433	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34434	pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
34435	ptr::read_unaligned(src:mem_addr as *const __m512i)
34436	}
34437
34438	/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34439	///
34440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34441	#[inline]
34442	#[target_feature(enable = "avx512f")]
34443	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34444	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34445	pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34446	ptr::write_unaligned(dst:mem_addr, src:a);
34447	}
34448
34449	/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34450	/// floating-point elements) from memory into result.
34451	/// `mem_addr` does not need to be aligned on any particular boundary.
34452	///
34453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34454	#[inline]
34455	#[target_feature(enable = "avx512f")]
34456	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34457	#[cfg_attr(test, assert_instr(vmovups))]
34458	pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34459	ptr::read_unaligned(src:mem_addr as *const __m512d)
34460	}
34461
34462	/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34463	/// floating-point elements) from `a` into memory.
34464	/// `mem_addr` does not need to be aligned on any particular boundary.
34465	///
34466	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34467	#[inline]
34468	#[target_feature(enable = "avx512f")]
34469	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34470	#[cfg_attr(test, assert_instr(vmovups))]
34471	pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34472	ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
34473	}
34474
34475	/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34476	/// floating-point elements) from memory into result.
34477	/// `mem_addr` does not need to be aligned on any particular boundary.
34478	///
34479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34480	#[inline]
34481	#[target_feature(enable = "avx512f")]
34482	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34483	#[cfg_attr(test, assert_instr(vmovups))]
34484	pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34485	ptr::read_unaligned(src:mem_addr as *const __m512)
34486	}
34487
34488	/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34489	/// floating-point elements) from `a` into memory.
34490	/// `mem_addr` does not need to be aligned on any particular boundary.
34491	///
34492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34493	#[inline]
34494	#[target_feature(enable = "avx512f")]
34495	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34496	#[cfg_attr(test, assert_instr(vmovups))]
34497	pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34498	ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
34499	}
34500
34501	/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34502	///
34503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34504	#[inline]
34505	#[target_feature(enable = "avx512f")]
34506	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34507	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34508	pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
34509	ptr::read(src:mem_addr as *const __m512i)
34510	}
34511
34512	/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34513	///
34514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34515	#[inline]
34516	#[target_feature(enable = "avx512f")]
34517	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34518	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34519	pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34520	ptr::write(dst:mem_addr, src:a);
34521	}
34522
34523	/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34524	///
34525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34526	#[inline]
34527	#[target_feature(enable = "avx512f")]
34528	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34529	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34530	pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34531	ptr::read(src:mem_addr as *const __m512i)
34532	}
34533
34534	/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34535	///
34536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34537	#[inline]
34538	#[target_feature(enable = "avx512f,avx512vl")]
34539	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34540	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34541	pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34542	ptr::read(src:mem_addr as *const __m256i)
34543	}
34544
34545	/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34546	///
34547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34548	#[inline]
34549	#[target_feature(enable = "avx512f,avx512vl")]
34550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34551	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34552	pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34553	ptr::read(src:mem_addr as *const __m128i)
34554	}
34555
34556	/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34557	///
34558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34559	#[inline]
34560	#[target_feature(enable = "avx512f")]
34561	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34562	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34563	pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34564	ptr::write(dst:mem_addr as *mut __m512i, src:a);
34565	}
34566
34567	/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34568	///
34569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34570	#[inline]
34571	#[target_feature(enable = "avx512f,avx512vl")]
34572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34573	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34574	pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34575	ptr::write(dst:mem_addr as *mut __m256i, src:a);
34576	}
34577
34578	/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34579	///
34580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34581	#[inline]
34582	#[target_feature(enable = "avx512f,avx512vl")]
34583	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34584	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34585	pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34586	ptr::write(dst:mem_addr as *mut __m128i, src:a);
34587	}
34588
34589	/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34590	///
34591	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34592	#[inline]
34593	#[target_feature(enable = "avx512f")]
34594	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34595	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34596	pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34597	ptr::read(src:mem_addr as *const __m512i)
34598	}
34599
34600	/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34601	///
34602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34603	#[inline]
34604	#[target_feature(enable = "avx512f,avx512vl")]
34605	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34606	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34607	pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34608	ptr::read(src:mem_addr as *const __m256i)
34609	}
34610
34611	/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34612	///
34613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34614	#[inline]
34615	#[target_feature(enable = "avx512f,avx512vl")]
34616	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34617	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34618	pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34619	ptr::read(src:mem_addr as *const __m128i)
34620	}
34621
34622	/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34623	///
34624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34625	#[inline]
34626	#[target_feature(enable = "avx512f")]
34627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34628	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34629	pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34630	ptr::write(dst:mem_addr as *mut __m512i, src:a);
34631	}
34632
34633	/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34634	///
34635	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34636	#[inline]
34637	#[target_feature(enable = "avx512f,avx512vl")]
34638	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34639	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34640	pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34641	ptr::write(dst:mem_addr as *mut __m256i, src:a);
34642	}
34643
34644	/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34645	///
34646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34647	#[inline]
34648	#[target_feature(enable = "avx512f,avx512vl")]
34649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34650	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34651	pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34652	ptr::write(dst:mem_addr as *mut __m128i, src:a);
34653	}
34654
34655	/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34656	///
34657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34658	#[inline]
34659	#[target_feature(enable = "avx512f")]
34660	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34661	#[cfg_attr(test, assert_instr(vmovaps))]
34662	pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34663	ptr::read(src:mem_addr as *const __m512)
34664	}
34665
34666	/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34667	///
34668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34669	#[inline]
34670	#[target_feature(enable = "avx512f")]
34671	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34672	#[cfg_attr(test, assert_instr(vmovaps))]
34673	pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34674	ptr::write(dst:mem_addr as *mut __m512, src:a);
34675	}
34676
34677	/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34678	///
34679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34680	#[inline]
34681	#[target_feature(enable = "avx512f")]
34682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34683	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34684	pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34685	ptr::read(src:mem_addr as *const __m512d)
34686	}
34687
34688	/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34689	///
34690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34691	#[inline]
34692	#[target_feature(enable = "avx512f")]
34693	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34694	#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34695	pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34696	ptr::write(dst:mem_addr as *mut __m512d, src:a);
34697	}
34698
34699	/// Load packed 32-bit integers from memory into dst using writemask k
34700	/// (elements are copied from src when the corresponding mask bit is not set).
34701	/// mem_addr does not need to be aligned on any particular boundary.
34702	///
34703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34704	#[inline]
34705	#[target_feature(enable = "avx512f")]
34706	#[cfg_attr(test, assert_instr(vmovdqu32))]
34707	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34708	pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34709	transmute(src:loaddqu32_512(mem_addr, a:src.as_i32x16(), mask:k))
34710	}
34711
34712	/// Load packed 32-bit integers from memory into dst using zeromask k
34713	/// (elements are zeroed out when the corresponding mask bit is not set).
34714	/// mem_addr does not need to be aligned on any particular boundary.
34715	///
34716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34717	#[inline]
34718	#[target_feature(enable = "avx512f")]
34719	#[cfg_attr(test, assert_instr(vmovdqu32))]
34720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34721	pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34722	_mm512_mask_loadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
34723	}
34724
34725	/// Load packed 64-bit integers from memory into dst using writemask k
34726	/// (elements are copied from src when the corresponding mask bit is not set).
34727	/// mem_addr does not need to be aligned on any particular boundary.
34728	///
34729	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34730	#[inline]
34731	#[target_feature(enable = "avx512f")]
34732	#[cfg_attr(test, assert_instr(vmovdqu64))]
34733	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34734	pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34735	transmute(src:loaddqu64_512(mem_addr, a:src.as_i64x8(), mask:k))
34736	}
34737
34738	/// Load packed 64-bit integers from memory into dst using zeromask k
34739	/// (elements are zeroed out when the corresponding mask bit is not set).
34740	/// mem_addr does not need to be aligned on any particular boundary.
34741	///
34742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34743	#[inline]
34744	#[target_feature(enable = "avx512f")]
34745	#[cfg_attr(test, assert_instr(vmovdqu64))]
34746	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34747	pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34748	_mm512_mask_loadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
34749	}
34750
34751	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34752	/// (elements are copied from src when the corresponding mask bit is not set).
34753	/// mem_addr does not need to be aligned on any particular boundary.
34754	///
34755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34756	#[inline]
34757	#[target_feature(enable = "avx512f")]
34758	#[cfg_attr(test, assert_instr(vmovups))]
34759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34760	pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34761	transmute(src:loadups_512(mem_addr, a:src.as_f32x16(), mask:k))
34762	}
34763
34764	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34765	/// (elements are zeroed out when the corresponding mask bit is not set).
34766	/// mem_addr does not need to be aligned on any particular boundary.
34767	///
34768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34769	#[inline]
34770	#[target_feature(enable = "avx512f")]
34771	#[cfg_attr(test, assert_instr(vmovups))]
34772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34773	pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34774	_mm512_mask_loadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
34775	}
34776
34777	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34778	/// (elements are copied from src when the corresponding mask bit is not set).
34779	/// mem_addr does not need to be aligned on any particular boundary.
34780	///
34781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34782	#[inline]
34783	#[target_feature(enable = "avx512f")]
34784	#[cfg_attr(test, assert_instr(vmovupd))]
34785	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34786	pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34787	transmute(src:loadupd_512(mem_addr, a:src.as_f64x8(), mask:k))
34788	}
34789
34790	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34791	/// (elements are zeroed out when the corresponding mask bit is not set).
34792	/// mem_addr does not need to be aligned on any particular boundary.
34793	///
34794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34795	#[inline]
34796	#[target_feature(enable = "avx512f")]
34797	#[cfg_attr(test, assert_instr(vmovupd))]
34798	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34799	pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34800	_mm512_mask_loadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
34801	}
34802
34803	/// Load packed 32-bit integers from memory into dst using writemask k
34804	/// (elements are copied from src when the corresponding mask bit is not set).
34805	/// mem_addr does not need to be aligned on any particular boundary.
34806	///
34807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34808	#[inline]
34809	#[target_feature(enable = "avx512f,avx512vl")]
34810	#[cfg_attr(test, assert_instr(vmovdqu32))]
34811	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34812	pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34813	transmute(src:loaddqu32_256(mem_addr, a:src.as_i32x8(), mask:k))
34814	}
34815
34816	/// Load packed 32-bit integers from memory into dst using zeromask k
34817	/// (elements are zeroed out when the corresponding mask bit is not set).
34818	/// mem_addr does not need to be aligned on any particular boundary.
34819	///
34820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34821	#[inline]
34822	#[target_feature(enable = "avx512f,avx512vl")]
34823	#[cfg_attr(test, assert_instr(vmovdqu32))]
34824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34825	pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34826	_mm256_mask_loadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
34827	}
34828
34829	/// Load packed 64-bit integers from memory into dst using writemask k
34830	/// (elements are copied from src when the corresponding mask bit is not set).
34831	/// mem_addr does not need to be aligned on any particular boundary.
34832	///
34833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34834	#[inline]
34835	#[target_feature(enable = "avx512f,avx512vl")]
34836	#[cfg_attr(test, assert_instr(vmovdqu64))]
34837	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34838	pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34839	transmute(src:loaddqu64_256(mem_addr, a:src.as_i64x4(), mask:k))
34840	}
34841
34842	/// Load packed 64-bit integers from memory into dst using zeromask k
34843	/// (elements are zeroed out when the corresponding mask bit is not set).
34844	/// mem_addr does not need to be aligned on any particular boundary.
34845	///
34846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34847	#[inline]
34848	#[target_feature(enable = "avx512f,avx512vl")]
34849	#[cfg_attr(test, assert_instr(vmovdqu64))]
34850	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34851	pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34852	_mm256_mask_loadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
34853	}
34854
34855	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34856	/// (elements are copied from src when the corresponding mask bit is not set).
34857	/// mem_addr does not need to be aligned on any particular boundary.
34858	///
34859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34860	#[inline]
34861	#[target_feature(enable = "avx512f,avx512vl")]
34862	#[cfg_attr(test, assert_instr(vmovups))]
34863	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34864	pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34865	transmute(src:loadups_256(mem_addr, a:src.as_f32x8(), mask:k))
34866	}
34867
34868	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34869	/// (elements are zeroed out when the corresponding mask bit is not set).
34870	/// mem_addr does not need to be aligned on any particular boundary.
34871	///
34872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34873	#[inline]
34874	#[target_feature(enable = "avx512f,avx512vl")]
34875	#[cfg_attr(test, assert_instr(vmovups))]
34876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34877	pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34878	_mm256_mask_loadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
34879	}
34880
34881	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34882	/// (elements are copied from src when the corresponding mask bit is not set).
34883	/// mem_addr does not need to be aligned on any particular boundary.
34884	///
34885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34886	#[inline]
34887	#[target_feature(enable = "avx512f,avx512vl")]
34888	#[cfg_attr(test, assert_instr(vmovupd))]
34889	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34890	pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34891	transmute(src:loadupd_256(mem_addr, a:src.as_f64x4(), mask:k))
34892	}
34893
34894	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34895	/// (elements are zeroed out when the corresponding mask bit is not set).
34896	/// mem_addr does not need to be aligned on any particular boundary.
34897	///
34898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34899	#[inline]
34900	#[target_feature(enable = "avx512f,avx512vl")]
34901	#[cfg_attr(test, assert_instr(vmovupd))]
34902	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34903	pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34904	_mm256_mask_loadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
34905	}
34906
34907	/// Load packed 32-bit integers from memory into dst using writemask k
34908	/// (elements are copied from src when the corresponding mask bit is not set).
34909	/// mem_addr does not need to be aligned on any particular boundary.
34910	///
34911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34912	#[inline]
34913	#[target_feature(enable = "avx512f,avx512vl")]
34914	#[cfg_attr(test, assert_instr(vmovdqu32))]
34915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34916	pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34917	transmute(src:loaddqu32_128(mem_addr, a:src.as_i32x4(), mask:k))
34918	}
34919
34920	/// Load packed 32-bit integers from memory into dst using zeromask k
34921	/// (elements are zeroed out when the corresponding mask bit is not set).
34922	/// mem_addr does not need to be aligned on any particular boundary.
34923	///
34924	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
34925	#[inline]
34926	#[target_feature(enable = "avx512f,avx512vl")]
34927	#[cfg_attr(test, assert_instr(vmovdqu32))]
34928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34929	pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34930	_mm_mask_loadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
34931	}
34932
34933	/// Load packed 64-bit integers from memory into dst using writemask k
34934	/// (elements are copied from src when the corresponding mask bit is not set).
34935	/// mem_addr does not need to be aligned on any particular boundary.
34936	///
34937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
34938	#[inline]
34939	#[target_feature(enable = "avx512f,avx512vl")]
34940	#[cfg_attr(test, assert_instr(vmovdqu64))]
34941	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34942	pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
34943	transmute(src:loaddqu64_128(mem_addr, a:src.as_i64x2(), mask:k))
34944	}
34945
34946	/// Load packed 64-bit integers from memory into dst using zeromask k
34947	/// (elements are zeroed out when the corresponding mask bit is not set).
34948	/// mem_addr does not need to be aligned on any particular boundary.
34949	///
34950	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
34951	#[inline]
34952	#[target_feature(enable = "avx512f,avx512vl")]
34953	#[cfg_attr(test, assert_instr(vmovdqu64))]
34954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955	pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34956	_mm_mask_loadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
34957	}
34958
34959	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34960	/// (elements are copied from src when the corresponding mask bit is not set).
34961	/// mem_addr does not need to be aligned on any particular boundary.
34962	///
34963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
34964	#[inline]
34965	#[target_feature(enable = "avx512f,avx512vl")]
34966	#[cfg_attr(test, assert_instr(vmovups))]
34967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34968	pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34969	transmute(src:loadups_128(mem_addr, a:src.as_f32x4(), mask:k))
34970	}
34971
34972	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34973	/// (elements are zeroed out when the corresponding mask bit is not set).
34974	/// mem_addr does not need to be aligned on any particular boundary.
34975	///
34976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
34977	#[inline]
34978	#[target_feature(enable = "avx512f,avx512vl")]
34979	#[cfg_attr(test, assert_instr(vmovups))]
34980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34981	pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34982	_mm_mask_loadu_ps(src:_mm_setzero_ps(), k, mem_addr)
34983	}
34984
34985	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34986	/// (elements are copied from src when the corresponding mask bit is not set).
34987	/// mem_addr does not need to be aligned on any particular boundary.
34988	///
34989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
34990	#[inline]
34991	#[target_feature(enable = "avx512f,avx512vl")]
34992	#[cfg_attr(test, assert_instr(vmovupd))]
34993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34994	pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34995	transmute(src:loadupd_128(mem_addr, a:src.as_f64x2(), mask:k))
34996	}
34997
34998	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34999	/// (elements are zeroed out when the corresponding mask bit is not set).
35000	/// mem_addr does not need to be aligned on any particular boundary.
35001	///
35002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35003	#[inline]
35004	#[target_feature(enable = "avx512f,avx512vl")]
35005	#[cfg_attr(test, assert_instr(vmovupd))]
35006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35007	pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35008	_mm_mask_loadu_pd(src:_mm_setzero_pd(), k, mem_addr)
35009	}
35010
35011	/// Load packed 32-bit integers from memory into dst using writemask k
35012	/// (elements are copied from src when the corresponding mask bit is not set).
35013	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35014	///
35015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35016	#[inline]
35017	#[target_feature(enable = "avx512f")]
35018	#[cfg_attr(test, assert_instr(vmovdqa32))]
35019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35020	pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35021	transmute(src:loaddqa32_512(mem_addr, a:src.as_i32x16(), mask:k))
35022	}
35023
35024	/// Load packed 32-bit integers from memory into dst using zeromask k
35025	/// (elements are zeroed out when the corresponding mask bit is not set).
35026	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35027	///
35028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35029	#[inline]
35030	#[target_feature(enable = "avx512f")]
35031	#[cfg_attr(test, assert_instr(vmovdqa32))]
35032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35033	pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35034	_mm512_mask_load_epi32(src:_mm512_setzero_si512(), k, mem_addr)
35035	}
35036
35037	/// Load packed 64-bit integers from memory into dst using writemask k
35038	/// (elements are copied from src when the corresponding mask bit is not set).
35039	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35040	///
35041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35042	#[inline]
35043	#[target_feature(enable = "avx512f")]
35044	#[cfg_attr(test, assert_instr(vmovdqa64))]
35045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35046	pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35047	transmute(src:loaddqa64_512(mem_addr, a:src.as_i64x8(), mask:k))
35048	}
35049
35050	/// Load packed 64-bit integers from memory into dst using zeromask k
35051	/// (elements are zeroed out when the corresponding mask bit is not set).
35052	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35053	///
35054	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35055	#[inline]
35056	#[target_feature(enable = "avx512f")]
35057	#[cfg_attr(test, assert_instr(vmovdqa64))]
35058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35059	pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35060	_mm512_mask_load_epi64(src:_mm512_setzero_si512(), k, mem_addr)
35061	}
35062
35063	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35064	/// (elements are copied from src when the corresponding mask bit is not set).
35065	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35066	///
35067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35068	#[inline]
35069	#[target_feature(enable = "avx512f")]
35070	#[cfg_attr(test, assert_instr(vmovaps))]
35071	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35072	pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35073	transmute(src:loadaps_512(mem_addr, a:src.as_f32x16(), mask:k))
35074	}
35075
35076	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35077	/// (elements are zeroed out when the corresponding mask bit is not set).
35078	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35079	///
35080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35081	#[inline]
35082	#[target_feature(enable = "avx512f")]
35083	#[cfg_attr(test, assert_instr(vmovaps))]
35084	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35085	pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35086	_mm512_mask_load_ps(src:_mm512_setzero_ps(), k, mem_addr)
35087	}
35088
35089	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35090	/// (elements are copied from src when the corresponding mask bit is not set).
35091	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35092	///
35093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35094	#[inline]
35095	#[target_feature(enable = "avx512f")]
35096	#[cfg_attr(test, assert_instr(vmovapd))]
35097	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35098	pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35099	transmute(src:loadapd_512(mem_addr, a:src.as_f64x8(), mask:k))
35100	}
35101
35102	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35103	/// (elements are zeroed out when the corresponding mask bit is not set).
35104	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35105	///
35106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35107	#[inline]
35108	#[target_feature(enable = "avx512f")]
35109	#[cfg_attr(test, assert_instr(vmovapd))]
35110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35111	pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35112	_mm512_mask_load_pd(src:_mm512_setzero_pd(), k, mem_addr)
35113	}
35114
35115	/// Load packed 32-bit integers from memory into dst using writemask k
35116	/// (elements are copied from src when the corresponding mask bit is not set).
35117	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35118	///
35119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35120	#[inline]
35121	#[target_feature(enable = "avx512f,avx512vl")]
35122	#[cfg_attr(test, assert_instr(vmovdqa32))]
35123	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35124	pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35125	transmute(src:loaddqa32_256(mem_addr, a:src.as_i32x8(), mask:k))
35126	}
35127
35128	/// Load packed 32-bit integers from memory into dst using zeromask k
35129	/// (elements are zeroed out when the corresponding mask bit is not set).
35130	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35131	///
35132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35133	#[inline]
35134	#[target_feature(enable = "avx512f,avx512vl")]
35135	#[cfg_attr(test, assert_instr(vmovdqa32))]
35136	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35137	pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35138	_mm256_mask_load_epi32(src:_mm256_setzero_si256(), k, mem_addr)
35139	}
35140
35141	/// Load packed 64-bit integers from memory into dst using writemask k
35142	/// (elements are copied from src when the corresponding mask bit is not set).
35143	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35144	///
35145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35146	#[inline]
35147	#[target_feature(enable = "avx512f,avx512vl")]
35148	#[cfg_attr(test, assert_instr(vmovdqa64))]
35149	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35150	pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35151	transmute(src:loaddqa64_256(mem_addr, a:src.as_i64x4(), mask:k))
35152	}
35153
35154	/// Load packed 64-bit integers from memory into dst using zeromask k
35155	/// (elements are zeroed out when the corresponding mask bit is not set).
35156	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35157	///
35158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35159	#[inline]
35160	#[target_feature(enable = "avx512f,avx512vl")]
35161	#[cfg_attr(test, assert_instr(vmovdqa64))]
35162	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35163	pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35164	_mm256_mask_load_epi64(src:_mm256_setzero_si256(), k, mem_addr)
35165	}
35166
35167	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35168	/// (elements are copied from src when the corresponding mask bit is not set).
35169	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35170	///
35171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35172	#[inline]
35173	#[target_feature(enable = "avx512f,avx512vl")]
35174	#[cfg_attr(test, assert_instr(vmovaps))]
35175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35176	pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35177	transmute(src:loadaps_256(mem_addr, a:src.as_f32x8(), mask:k))
35178	}
35179
35180	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35181	/// (elements are zeroed out when the corresponding mask bit is not set).
35182	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35183	///
35184	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35185	#[inline]
35186	#[target_feature(enable = "avx512f,avx512vl")]
35187	#[cfg_attr(test, assert_instr(vmovaps))]
35188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35189	pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35190	_mm256_mask_load_ps(src:_mm256_setzero_ps(), k, mem_addr)
35191	}
35192
35193	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35194	/// (elements are copied from src when the corresponding mask bit is not set).
35195	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35196	///
35197	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35198	#[inline]
35199	#[target_feature(enable = "avx512f,avx512vl")]
35200	#[cfg_attr(test, assert_instr(vmovapd))]
35201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35202	pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35203	transmute(src:loadapd_256(mem_addr, a:src.as_f64x4(), mask:k))
35204	}
35205
35206	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35207	/// (elements are zeroed out when the corresponding mask bit is not set).
35208	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35209	///
35210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35211	#[inline]
35212	#[target_feature(enable = "avx512f,avx512vl")]
35213	#[cfg_attr(test, assert_instr(vmovapd))]
35214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35215	pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35216	_mm256_mask_load_pd(src:_mm256_setzero_pd(), k, mem_addr)
35217	}
35218
35219	/// Load packed 32-bit integers from memory into dst using writemask k
35220	/// (elements are copied from src when the corresponding mask bit is not set).
35221	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35222	///
35223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35224	#[inline]
35225	#[target_feature(enable = "avx512f,avx512vl")]
35226	#[cfg_attr(test, assert_instr(vmovdqa32))]
35227	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35228	pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35229	transmute(src:loaddqa32_128(mem_addr, a:src.as_i32x4(), mask:k))
35230	}
35231
35232	/// Load packed 32-bit integers from memory into dst using zeromask k
35233	/// (elements are zeroed out when the corresponding mask bit is not set).
35234	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35235	///
35236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35237	#[inline]
35238	#[target_feature(enable = "avx512f,avx512vl")]
35239	#[cfg_attr(test, assert_instr(vmovdqa32))]
35240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35241	pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35242	_mm_mask_load_epi32(src:_mm_setzero_si128(), k, mem_addr)
35243	}
35244
35245	/// Load packed 64-bit integers from memory into dst using writemask k
35246	/// (elements are copied from src when the corresponding mask bit is not set).
35247	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35248	///
35249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35250	#[inline]
35251	#[target_feature(enable = "avx512f,avx512vl")]
35252	#[cfg_attr(test, assert_instr(vmovdqa64))]
35253	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35254	pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35255	transmute(src:loaddqa64_128(mem_addr, a:src.as_i64x2(), mask:k))
35256	}
35257
35258	/// Load packed 64-bit integers from memory into dst using zeromask k
35259	/// (elements are zeroed out when the corresponding mask bit is not set).
35260	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35261	///
35262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35263	#[inline]
35264	#[target_feature(enable = "avx512f,avx512vl")]
35265	#[cfg_attr(test, assert_instr(vmovdqa64))]
35266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35267	pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35268	_mm_mask_load_epi64(src:_mm_setzero_si128(), k, mem_addr)
35269	}
35270
35271	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35272	/// (elements are copied from src when the corresponding mask bit is not set).
35273	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35274	///
35275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35276	#[inline]
35277	#[target_feature(enable = "avx512f,avx512vl")]
35278	#[cfg_attr(test, assert_instr(vmovaps))]
35279	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35280	pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35281	transmute(src:loadaps_128(mem_addr, a:src.as_f32x4(), mask:k))
35282	}
35283
35284	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35285	/// (elements are zeroed out when the corresponding mask bit is not set).
35286	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35287	///
35288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35289	#[inline]
35290	#[target_feature(enable = "avx512f,avx512vl")]
35291	#[cfg_attr(test, assert_instr(vmovaps))]
35292	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35293	pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35294	_mm_mask_load_ps(src:_mm_setzero_ps(), k, mem_addr)
35295	}
35296
35297	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35298	/// (elements are copied from src when the corresponding mask bit is not set).
35299	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35300	///
35301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35302	#[inline]
35303	#[target_feature(enable = "avx512f,avx512vl")]
35304	#[cfg_attr(test, assert_instr(vmovapd))]
35305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35306	pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35307	transmute(src:loadapd_128(mem_addr, a:src.as_f64x2(), mask:k))
35308	}
35309
35310	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35311	/// (elements are zeroed out when the corresponding mask bit is not set).
35312	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35313	///
35314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35315	#[inline]
35316	#[target_feature(enable = "avx512f,avx512vl")]
35317	#[cfg_attr(test, assert_instr(vmovapd))]
35318	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35319	pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35320	_mm_mask_load_pd(src:_mm_setzero_pd(), k, mem_addr)
35321	}
35322
35323	/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35324	/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35325	/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35326	/// exception may be generated.
35327	///
35328	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35329	#[inline]
35330	#[cfg_attr(test, assert_instr(vmovss))]
35331	#[target_feature(enable = "avx512f")]
35332	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35333	pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35334	let mut dst: __m128 = src;
35335	asm!(
35336	vpl!("vmovss {dst}{{{k}}}"),
35337	p = in(reg) mem_addr,
35338	k = in(kreg) k,
35339	dst = inout(xmm_reg) dst,
35340	options(pure, readonly, nostack, preserves_flags),
35341	);
35342	dst
35343	}
35344
35345	/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35346	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35347	/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35348	/// exception may be generated.
35349	///
35350	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35351	#[inline]
35352	#[cfg_attr(test, assert_instr(vmovss))]
35353	#[target_feature(enable = "avx512f")]
35354	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35355	pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35356	let mut dst: __m128;
35357	asm!(
35358	vpl!("vmovss {dst}{{{k}}} {{z}}"),
35359	p = in(reg) mem_addr,
35360	k = in(kreg) k,
35361	dst = out(xmm_reg) dst,
35362	options(pure, readonly, nostack, preserves_flags),
35363	);
35364	dst
35365	}
35366
35367	/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35368	/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35369	/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35370	/// exception may be generated.
35371	///
35372	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35373	#[inline]
35374	#[cfg_attr(test, assert_instr(vmovsd))]
35375	#[target_feature(enable = "avx512f")]
35376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35377	pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35378	let mut dst: __m128d = src;
35379	asm!(
35380	vpl!("vmovsd {dst}{{{k}}}"),
35381	p = in(reg) mem_addr,
35382	k = in(kreg) k,
35383	dst = inout(xmm_reg) dst,
35384	options(pure, readonly, nostack, preserves_flags),
35385	);
35386	dst
35387	}
35388
35389	/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35390	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35391	/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35392	/// may be generated.
35393	///
35394	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35395	#[inline]
35396	#[cfg_attr(test, assert_instr(vmovsd))]
35397	#[target_feature(enable = "avx512f")]
35398	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35399	pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35400	let mut dst: __m128d;
35401	asm!(
35402	vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35403	p = in(reg) mem_addr,
35404	k = in(kreg) k,
35405	dst = out(xmm_reg) dst,
35406	options(pure, readonly, nostack, preserves_flags),
35407	);
35408	dst
35409	}
35410
35411	/// Store packed 32-bit integers from a into memory using writemask k.
35412	/// mem_addr does not need to be aligned on any particular boundary.
35413	///
35414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35415	#[inline]
35416	#[target_feature(enable = "avx512f")]
35417	#[cfg_attr(test, assert_instr(vmovdqu32))]
35418	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35419	pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35420	storedqu32_512(mem_addr, a.as_i32x16(), mask)
35421	}
35422
35423	/// Store packed 64-bit integers from a into memory using writemask k.
35424	/// mem_addr does not need to be aligned on any particular boundary.
35425	///
35426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35427	#[inline]
35428	#[target_feature(enable = "avx512f")]
35429	#[cfg_attr(test, assert_instr(vmovdqu64))]
35430	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35431	pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35432	storedqu64_512(mem_addr, a.as_i64x8(), mask)
35433	}
35434
35435	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35436	/// mem_addr does not need to be aligned on any particular boundary.
35437	///
35438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35439	#[inline]
35440	#[target_feature(enable = "avx512f")]
35441	#[cfg_attr(test, assert_instr(vmovups))]
35442	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35443	pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35444	storeups_512(mem_addr, a.as_f32x16(), mask)
35445	}
35446
35447	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35448	/// mem_addr does not need to be aligned on any particular boundary.
35449	///
35450	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35451	#[inline]
35452	#[target_feature(enable = "avx512f")]
35453	#[cfg_attr(test, assert_instr(vmovupd))]
35454	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35455	pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35456	storeupd_512(mem_addr, a.as_f64x8(), mask)
35457	}
35458
35459	/// Store packed 32-bit integers from a into memory using writemask k.
35460	/// mem_addr does not need to be aligned on any particular boundary.
35461	///
35462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35463	#[inline]
35464	#[target_feature(enable = "avx512f,avx512vl")]
35465	#[cfg_attr(test, assert_instr(vmovdqu32))]
35466	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35467	pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35468	storedqu32_256(mem_addr, a.as_i32x8(), mask)
35469	}
35470
35471	/// Store packed 64-bit integers from a into memory using writemask k.
35472	/// mem_addr does not need to be aligned on any particular boundary.
35473	///
35474	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35475	#[inline]
35476	#[target_feature(enable = "avx512f,avx512vl")]
35477	#[cfg_attr(test, assert_instr(vmovdqu64))]
35478	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35479	pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35480	storedqu64_256(mem_addr, a.as_i64x4(), mask)
35481	}
35482
35483	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35484	/// mem_addr does not need to be aligned on any particular boundary.
35485	///
35486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35487	#[inline]
35488	#[target_feature(enable = "avx512f,avx512vl")]
35489	#[cfg_attr(test, assert_instr(vmovups))]
35490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35491	pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35492	storeups_256(mem_addr, a.as_f32x8(), mask)
35493	}
35494
35495	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35496	/// mem_addr does not need to be aligned on any particular boundary.
35497	///
35498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35499	#[inline]
35500	#[target_feature(enable = "avx512f,avx512vl")]
35501	#[cfg_attr(test, assert_instr(vmovupd))]
35502	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35503	pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35504	storeupd_256(mem_addr, a.as_f64x4(), mask)
35505	}
35506
35507	/// Store packed 32-bit integers from a into memory using writemask k.
35508	/// mem_addr does not need to be aligned on any particular boundary.
35509	///
35510	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35511	#[inline]
35512	#[target_feature(enable = "avx512f,avx512vl")]
35513	#[cfg_attr(test, assert_instr(vmovdqu32))]
35514	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35515	pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35516	storedqu32_128(mem_addr, a.as_i32x4(), mask)
35517	}
35518
35519	/// Store packed 64-bit integers from a into memory using writemask k.
35520	/// mem_addr does not need to be aligned on any particular boundary.
35521	///
35522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35523	#[inline]
35524	#[target_feature(enable = "avx512f,avx512vl")]
35525	#[cfg_attr(test, assert_instr(vmovdqu64))]
35526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35527	pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35528	storedqu64_128(mem_addr, a.as_i64x2(), mask)
35529	}
35530
35531	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35532	/// mem_addr does not need to be aligned on any particular boundary.
35533	///
35534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35535	#[inline]
35536	#[target_feature(enable = "avx512f,avx512vl")]
35537	#[cfg_attr(test, assert_instr(vmovups))]
35538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35539	pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35540	storeups_128(mem_addr, a.as_f32x4(), mask)
35541	}
35542
35543	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35544	/// mem_addr does not need to be aligned on any particular boundary.
35545	///
35546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35547	#[inline]
35548	#[target_feature(enable = "avx512f,avx512vl")]
35549	#[cfg_attr(test, assert_instr(vmovupd))]
35550	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35551	pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35552	storeupd_128(mem_addr, a.as_f64x2(), mask)
35553	}
35554
35555	/// Store packed 32-bit integers from a into memory using writemask k.
35556	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35557	///
35558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35559	#[inline]
35560	#[target_feature(enable = "avx512f")]
35561	#[cfg_attr(test, assert_instr(vmovdqa32))]
35562	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35563	pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35564	storedqa32_512(mem_addr, a.as_i32x16(), mask)
35565	}
35566
35567	/// Store packed 64-bit integers from a into memory using writemask k.
35568	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35569	///
35570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35571	#[inline]
35572	#[target_feature(enable = "avx512f")]
35573	#[cfg_attr(test, assert_instr(vmovdqa64))]
35574	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35575	pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35576	storedqa64_512(mem_addr, a.as_i64x8(), mask)
35577	}
35578
35579	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35580	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35581	///
35582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35583	#[inline]
35584	#[target_feature(enable = "avx512f")]
35585	#[cfg_attr(test, assert_instr(vmovaps))]
35586	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35587	pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35588	storeaps_512(mem_addr, a.as_f32x16(), mask)
35589	}
35590
35591	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35592	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35593	///
35594	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35595	#[inline]
35596	#[target_feature(enable = "avx512f")]
35597	#[cfg_attr(test, assert_instr(vmovapd))]
35598	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35599	pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35600	storeapd_512(mem_addr, a.as_f64x8(), mask)
35601	}
35602
35603	/// Store packed 32-bit integers from a into memory using writemask k.
35604	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35605	///
35606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35607	#[inline]
35608	#[target_feature(enable = "avx512f,avx512vl")]
35609	#[cfg_attr(test, assert_instr(vmovdqa32))]
35610	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35611	pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35612	storedqa32_256(mem_addr, a.as_i32x8(), mask)
35613	}
35614
35615	/// Store packed 64-bit integers from a into memory using writemask k.
35616	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35617	///
35618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35619	#[inline]
35620	#[target_feature(enable = "avx512f,avx512vl")]
35621	#[cfg_attr(test, assert_instr(vmovdqa64))]
35622	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35623	pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35624	storedqa64_256(mem_addr, a.as_i64x4(), mask)
35625	}
35626
35627	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35628	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35629	///
35630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35631	#[inline]
35632	#[target_feature(enable = "avx512f,avx512vl")]
35633	#[cfg_attr(test, assert_instr(vmovaps))]
35634	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35635	pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35636	storeaps_256(mem_addr, a.as_f32x8(), mask)
35637	}
35638
35639	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35640	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35641	///
35642	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35643	#[inline]
35644	#[target_feature(enable = "avx512f,avx512vl")]
35645	#[cfg_attr(test, assert_instr(vmovapd))]
35646	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35647	pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35648	storeapd_256(mem_addr, a.as_f64x4(), mask)
35649	}
35650
35651	/// Store packed 32-bit integers from a into memory using writemask k.
35652	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35653	///
35654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35655	#[inline]
35656	#[target_feature(enable = "avx512f,avx512vl")]
35657	#[cfg_attr(test, assert_instr(vmovdqa32))]
35658	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35659	pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35660	storedqa32_128(mem_addr, a.as_i32x4(), mask)
35661	}
35662
35663	/// Store packed 64-bit integers from a into memory using writemask k.
35664	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35665	///
35666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35667	#[inline]
35668	#[target_feature(enable = "avx512f,avx512vl")]
35669	#[cfg_attr(test, assert_instr(vmovdqa64))]
35670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35671	pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35672	storedqa64_128(mem_addr, a.as_i64x2(), mask)
35673	}
35674
35675	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35676	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35677	///
35678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35679	#[inline]
35680	#[target_feature(enable = "avx512f,avx512vl")]
35681	#[cfg_attr(test, assert_instr(vmovaps))]
35682	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35683	pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35684	storeaps_128(mem_addr, a.as_f32x4(), mask)
35685	}
35686
35687	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35688	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35689	///
35690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35691	#[inline]
35692	#[target_feature(enable = "avx512f,avx512vl")]
35693	#[cfg_attr(test, assert_instr(vmovapd))]
35694	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35695	pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35696	storeapd_128(mem_addr, a.as_f64x2(), mask)
35697	}
35698
35699	/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35700	/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35701	///
35702	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35703	#[inline]
35704	#[cfg_attr(test, assert_instr(vmovss))]
35705	#[target_feature(enable = "avx512f")]
35706	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35707	pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35708	asm!(
35709	vps!("vmovss", "{{{k}}}, {a}"),
35710	p = in(reg) mem_addr,
35711	k = in(kreg) k,
35712	a = in(xmm_reg) a,
35713	options(nostack, preserves_flags),
35714	);
35715	}
35716
35717	/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35718	/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35719	///
35720	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35721	#[inline]
35722	#[cfg_attr(test, assert_instr(vmovsd))]
35723	#[target_feature(enable = "avx512f")]
35724	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35725	pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35726	asm!(
35727	vps!("vmovsd", "{{{k}}}, {a}"),
35728	p = in(reg) mem_addr,
35729	k = in(kreg) k,
35730	a = in(xmm_reg) a,
35731	options(nostack, preserves_flags),
35732	);
35733	}
35734
35735	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35736	///
35737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35738	#[inline]
35739	#[target_feature(enable = "avx512f")]
35740	#[cfg_attr(test, assert_instr(vpexpandd))]
35741	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35742	pub unsafe fn _mm512_mask_expandloadu_epi32(
35743	src: __m512i,
35744	k: __mmask16,
35745	mem_addr: *const i32,
35746	) -> __m512i {
35747	transmute(src:expandloadd_512(mem_addr, a:src.as_i32x16(), mask:k))
35748	}
35749
35750	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35751	///
35752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35753	#[inline]
35754	#[target_feature(enable = "avx512f")]
35755	#[cfg_attr(test, assert_instr(vpexpandd))]
35756	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35757	pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35758	_mm512_mask_expandloadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
35759	}
35760
35761	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35762	///
35763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35764	#[inline]
35765	#[target_feature(enable = "avx512f,avx512vl")]
35766	#[cfg_attr(test, assert_instr(vpexpandd))]
35767	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35768	pub unsafe fn _mm256_mask_expandloadu_epi32(
35769	src: __m256i,
35770	k: __mmask8,
35771	mem_addr: *const i32,
35772	) -> __m256i {
35773	transmute(src:expandloadd_256(mem_addr, a:src.as_i32x8(), mask:k))
35774	}
35775
35776	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35777	///
35778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35779	#[inline]
35780	#[target_feature(enable = "avx512f,avx512vl")]
35781	#[cfg_attr(test, assert_instr(vpexpandd))]
35782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35783	pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35784	_mm256_mask_expandloadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
35785	}
35786
35787	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35788	///
35789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35790	#[inline]
35791	#[target_feature(enable = "avx512f,avx512vl")]
35792	#[cfg_attr(test, assert_instr(vpexpandd))]
35793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35794	pub unsafe fn _mm_mask_expandloadu_epi32(
35795	src: __m128i,
35796	k: __mmask8,
35797	mem_addr: *const i32,
35798	) -> __m128i {
35799	transmute(src:expandloadd_128(mem_addr, a:src.as_i32x4(), mask:k))
35800	}
35801
35802	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35803	///
35804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35805	#[inline]
35806	#[target_feature(enable = "avx512f,avx512vl")]
35807	#[cfg_attr(test, assert_instr(vpexpandd))]
35808	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35809	pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35810	_mm_mask_expandloadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
35811	}
35812
35813	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35814	///
35815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35816	#[inline]
35817	#[target_feature(enable = "avx512f")]
35818	#[cfg_attr(test, assert_instr(vpexpandq))]
35819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35820	pub unsafe fn _mm512_mask_expandloadu_epi64(
35821	src: __m512i,
35822	k: __mmask8,
35823	mem_addr: *const i64,
35824	) -> __m512i {
35825	transmute(src:expandloadq_512(mem_addr, a:src.as_i64x8(), mask:k))
35826	}
35827
35828	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35829	///
35830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35831	#[inline]
35832	#[target_feature(enable = "avx512f")]
35833	#[cfg_attr(test, assert_instr(vpexpandq))]
35834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35835	pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35836	_mm512_mask_expandloadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
35837	}
35838
35839	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35840	///
35841	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35842	#[inline]
35843	#[target_feature(enable = "avx512f,avx512vl")]
35844	#[cfg_attr(test, assert_instr(vpexpandq))]
35845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35846	pub unsafe fn _mm256_mask_expandloadu_epi64(
35847	src: __m256i,
35848	k: __mmask8,
35849	mem_addr: *const i64,
35850	) -> __m256i {
35851	transmute(src:expandloadq_256(mem_addr, a:src.as_i64x4(), mask:k))
35852	}
35853
35854	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35855	///
35856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35857	#[inline]
35858	#[target_feature(enable = "avx512f,avx512vl")]
35859	#[cfg_attr(test, assert_instr(vpexpandq))]
35860	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35861	pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35862	_mm256_mask_expandloadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
35863	}
35864
35865	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35866	///
35867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35868	#[inline]
35869	#[target_feature(enable = "avx512f,avx512vl")]
35870	#[cfg_attr(test, assert_instr(vpexpandq))]
35871	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35872	pub unsafe fn _mm_mask_expandloadu_epi64(
35873	src: __m128i,
35874	k: __mmask8,
35875	mem_addr: *const i64,
35876	) -> __m128i {
35877	transmute(src:expandloadq_128(mem_addr, a:src.as_i64x2(), mask:k))
35878	}
35879
35880	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35881	///
35882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35883	#[inline]
35884	#[target_feature(enable = "avx512f,avx512vl")]
35885	#[cfg_attr(test, assert_instr(vpexpandq))]
35886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35887	pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35888	_mm_mask_expandloadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
35889	}
35890
35891	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35892	///
35893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35894	#[inline]
35895	#[target_feature(enable = "avx512f")]
35896	#[cfg_attr(test, assert_instr(vexpandps))]
35897	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35898	pub unsafe fn _mm512_mask_expandloadu_ps(
35899	src: __m512,
35900	k: __mmask16,
35901	mem_addr: *const f32,
35902	) -> __m512 {
35903	transmute(src:expandloadps_512(mem_addr, a:src.as_f32x16(), mask:k))
35904	}
35905
35906	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35907	///
35908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35909	#[inline]
35910	#[target_feature(enable = "avx512f")]
35911	#[cfg_attr(test, assert_instr(vexpandps))]
35912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35913	pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35914	_mm512_mask_expandloadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
35915	}
35916
35917	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35918	///
35919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35920	#[inline]
35921	#[target_feature(enable = "avx512f,avx512vl")]
35922	#[cfg_attr(test, assert_instr(vexpandps))]
35923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35924	pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35925	transmute(src:expandloadps_256(mem_addr, a:src.as_f32x8(), mask:k))
35926	}
35927
35928	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35929	///
35930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
35931	#[inline]
35932	#[target_feature(enable = "avx512f,avx512vl")]
35933	#[cfg_attr(test, assert_instr(vexpandps))]
35934	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35935	pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35936	_mm256_mask_expandloadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
35937	}
35938
35939	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35940	///
35941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
35942	#[inline]
35943	#[target_feature(enable = "avx512f,avx512vl")]
35944	#[cfg_attr(test, assert_instr(vexpandps))]
35945	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35946	pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35947	transmute(src:expandloadps_128(mem_addr, a:src.as_f32x4(), mask:k))
35948	}
35949
35950	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35951	///
35952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
35953	#[inline]
35954	#[target_feature(enable = "avx512f,avx512vl")]
35955	#[cfg_attr(test, assert_instr(vexpandps))]
35956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35957	pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35958	_mm_mask_expandloadu_ps(src:_mm_setzero_ps(), k, mem_addr)
35959	}
35960
35961	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35962	///
35963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
35964	#[inline]
35965	#[target_feature(enable = "avx512f")]
35966	#[cfg_attr(test, assert_instr(vexpandpd))]
35967	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35968	pub unsafe fn _mm512_mask_expandloadu_pd(
35969	src: __m512d,
35970	k: __mmask8,
35971	mem_addr: *const f64,
35972	) -> __m512d {
35973	transmute(src:expandloadpd_512(mem_addr, a:src.as_f64x8(), mask:k))
35974	}
35975
35976	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35977	///
35978	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
35979	#[inline]
35980	#[target_feature(enable = "avx512f")]
35981	#[cfg_attr(test, assert_instr(vexpandpd))]
35982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35983	pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35984	_mm512_mask_expandloadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
35985	}
35986
35987	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35988	///
35989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
35990	#[inline]
35991	#[target_feature(enable = "avx512f,avx512vl")]
35992	#[cfg_attr(test, assert_instr(vexpandpd))]
35993	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35994	pub unsafe fn _mm256_mask_expandloadu_pd(
35995	src: __m256d,
35996	k: __mmask8,
35997	mem_addr: *const f64,
35998	) -> __m256d {
35999	transmute(src:expandloadpd_256(mem_addr, a:src.as_f64x4(), mask:k))
36000	}
36001
36002	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36003	///
36004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36005	#[inline]
36006	#[target_feature(enable = "avx512f,avx512vl")]
36007	#[cfg_attr(test, assert_instr(vexpandpd))]
36008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36009	pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36010	_mm256_mask_expandloadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
36011	}
36012
36013	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36014	///
36015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36016	#[inline]
36017	#[target_feature(enable = "avx512f,avx512vl")]
36018	#[cfg_attr(test, assert_instr(vexpandpd))]
36019	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36020	pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36021	transmute(src:expandloadpd_128(mem_addr, a:src.as_f64x2(), mask:k))
36022	}
36023
36024	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36025	///
36026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36027	#[inline]
36028	#[target_feature(enable = "avx512f,avx512vl")]
36029	#[cfg_attr(test, assert_instr(vexpandpd))]
36030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36031	pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36032	_mm_mask_expandloadu_pd(src:_mm_setzero_pd(), k, mem_addr)
36033	}
36034
36035	/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36036	///
36037	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36038	#[inline]
36039	#[target_feature(enable = "avx512f")]
36040	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36041	pub fn _mm512_setr_pd(
36042	e0: f64,
36043	e1: f64,
36044	e2: f64,
36045	e3: f64,
36046	e4: f64,
36047	e5: f64,
36048	e6: f64,
36049	e7: f64,
36050	) -> __m512d {
36051	unsafe {
36052	let r: f64x8 = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
36053	transmute(src:r)
36054	}
36055	}
36056
36057	/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36058	///
36059	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36060	#[inline]
36061	#[target_feature(enable = "avx512f")]
36062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36063	pub fn _mm512_set_pd(
36064	e0: f64,
36065	e1: f64,
36066	e2: f64,
36067	e3: f64,
36068	e4: f64,
36069	e5: f64,
36070	e6: f64,
36071	e7: f64,
36072	) -> __m512d {
36073	_mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
36074	}
36075
36076	/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36077	///
36078	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36079	#[inline]
36080	#[target_feature(enable = "avx512f")]
36081	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36082	#[cfg_attr(test, assert_instr(vmovss))]
36083	pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36084	unsafe {
36085	let extractsrc: f32 = simd_extract!(src, `0`);
36086	let mut mov: f32 = extractsrc;
36087	if (k & `0b00000001`) != `0` {
36088	mov = simd_extract!(b, `0`);
36089	}
36090	simd_insert!(a, `0`, mov)
36091	}
36092	}
36093
36094	/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36095	///
36096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36097	#[inline]
36098	#[target_feature(enable = "avx512f")]
36099	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36100	#[cfg_attr(test, assert_instr(vmovss))]
36101	pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36102	unsafe {
36103	let mut mov: f32 = `0.`;
36104	if (k & `0b00000001`) != `0` {
36105	mov = simd_extract!(b, `0`);
36106	}
36107	simd_insert!(a, `0`, mov)
36108	}
36109	}
36110
36111	/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36112	///
36113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36114	#[inline]
36115	#[target_feature(enable = "avx512f")]
36116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36117	#[cfg_attr(test, assert_instr(vmovsd))]
36118	pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36119	unsafe {
36120	let extractsrc: f64 = simd_extract!(src, `0`);
36121	let mut mov: f64 = extractsrc;
36122	if (k & `0b00000001`) != `0` {
36123	mov = simd_extract!(b, `0`);
36124	}
36125	simd_insert!(a, `0`, mov)
36126	}
36127	}
36128
36129	/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36130	///
36131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36132	#[inline]
36133	#[target_feature(enable = "avx512f")]
36134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36135	#[cfg_attr(test, assert_instr(vmovsd))]
36136	pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36137	unsafe {
36138	let mut mov: f64 = `0.`;
36139	if (k & `0b00000001`) != `0` {
36140	mov = simd_extract!(b, `0`);
36141	}
36142	simd_insert!(a, `0`, mov)
36143	}
36144	}
36145
36146	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36147	///
36148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36149	#[inline]
36150	#[target_feature(enable = "avx512f")]
36151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36152	#[cfg_attr(test, assert_instr(vaddss))]
36153	pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36154	unsafe {
36155	let extractsrc: f32 = simd_extract!(src, `0`);
36156	let mut add: f32 = extractsrc;
36157	if (k & `0b00000001`) != `0` {
36158	let extracta: f32 = simd_extract!(a, `0`);
36159	let extractb: f32 = simd_extract!(b, `0`);
36160	add = extracta + extractb;
36161	}
36162	simd_insert!(a, `0`, add)
36163	}
36164	}
36165
36166	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36167	///
36168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36169	#[inline]
36170	#[target_feature(enable = "avx512f")]
36171	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36172	#[cfg_attr(test, assert_instr(vaddss))]
36173	pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36174	unsafe {
36175	let mut add: f32 = `0.`;
36176	if (k & `0b00000001`) != `0` {
36177	let extracta: f32 = simd_extract!(a, `0`);
36178	let extractb: f32 = simd_extract!(b, `0`);
36179	add = extracta + extractb;
36180	}
36181	simd_insert!(a, `0`, add)
36182	}
36183	}
36184
36185	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36186	///
36187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36188	#[inline]
36189	#[target_feature(enable = "avx512f")]
36190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36191	#[cfg_attr(test, assert_instr(vaddsd))]
36192	pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36193	unsafe {
36194	let extractsrc: f64 = simd_extract!(src, `0`);
36195	let mut add: f64 = extractsrc;
36196	if (k & `0b00000001`) != `0` {
36197	let extracta: f64 = simd_extract!(a, `0`);
36198	let extractb: f64 = simd_extract!(b, `0`);
36199	add = extracta + extractb;
36200	}
36201	simd_insert!(a, `0`, add)
36202	}
36203	}
36204
36205	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36206	///
36207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36208	#[inline]
36209	#[target_feature(enable = "avx512f")]
36210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36211	#[cfg_attr(test, assert_instr(vaddsd))]
36212	pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36213	unsafe {
36214	let mut add: f64 = `0.`;
36215	if (k & `0b00000001`) != `0` {
36216	let extracta: f64 = simd_extract!(a, `0`);
36217	let extractb: f64 = simd_extract!(b, `0`);
36218	add = extracta + extractb;
36219	}
36220	simd_insert!(a, `0`, add)
36221	}
36222	}
36223
36224	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36225	///
36226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36227	#[inline]
36228	#[target_feature(enable = "avx512f")]
36229	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36230	#[cfg_attr(test, assert_instr(vsubss))]
36231	pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36232	unsafe {
36233	let extractsrc: f32 = simd_extract!(src, `0`);
36234	let mut add: f32 = extractsrc;
36235	if (k & `0b00000001`) != `0` {
36236	let extracta: f32 = simd_extract!(a, `0`);
36237	let extractb: f32 = simd_extract!(b, `0`);
36238	add = extracta - extractb;
36239	}
36240	simd_insert!(a, `0`, add)
36241	}
36242	}
36243
36244	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36245	///
36246	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36247	#[inline]
36248	#[target_feature(enable = "avx512f")]
36249	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36250	#[cfg_attr(test, assert_instr(vsubss))]
36251	pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36252	unsafe {
36253	let mut add: f32 = `0.`;
36254	if (k & `0b00000001`) != `0` {
36255	let extracta: f32 = simd_extract!(a, `0`);
36256	let extractb: f32 = simd_extract!(b, `0`);
36257	add = extracta - extractb;
36258	}
36259	simd_insert!(a, `0`, add)
36260	}
36261	}
36262
36263	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36264	///
36265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36266	#[inline]
36267	#[target_feature(enable = "avx512f")]
36268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36269	#[cfg_attr(test, assert_instr(vsubsd))]
36270	pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36271	unsafe {
36272	let extractsrc: f64 = simd_extract!(src, `0`);
36273	let mut add: f64 = extractsrc;
36274	if (k & `0b00000001`) != `0` {
36275	let extracta: f64 = simd_extract!(a, `0`);
36276	let extractb: f64 = simd_extract!(b, `0`);
36277	add = extracta - extractb;
36278	}
36279	simd_insert!(a, `0`, add)
36280	}
36281	}
36282
36283	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36284	///
36285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36286	#[inline]
36287	#[target_feature(enable = "avx512f")]
36288	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36289	#[cfg_attr(test, assert_instr(vsubsd))]
36290	pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36291	unsafe {
36292	let mut add: f64 = `0.`;
36293	if (k & `0b00000001`) != `0` {
36294	let extracta: f64 = simd_extract!(a, `0`);
36295	let extractb: f64 = simd_extract!(b, `0`);
36296	add = extracta - extractb;
36297	}
36298	simd_insert!(a, `0`, add)
36299	}
36300	}
36301
36302	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36303	///
36304	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36305	#[inline]
36306	#[target_feature(enable = "avx512f")]
36307	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36308	#[cfg_attr(test, assert_instr(vmulss))]
36309	pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36310	unsafe {
36311	let extractsrc: f32 = simd_extract!(src, `0`);
36312	let mut add: f32 = extractsrc;
36313	if (k & `0b00000001`) != `0` {
36314	let extracta: f32 = simd_extract!(a, `0`);
36315	let extractb: f32 = simd_extract!(b, `0`);
36316	add = extracta * extractb;
36317	}
36318	simd_insert!(a, `0`, add)
36319	}
36320	}
36321
36322	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36323	///
36324	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36325	#[inline]
36326	#[target_feature(enable = "avx512f")]
36327	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36328	#[cfg_attr(test, assert_instr(vmulss))]
36329	pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36330	unsafe {
36331	let mut add: f32 = `0.`;
36332	if (k & `0b00000001`) != `0` {
36333	let extracta: f32 = simd_extract!(a, `0`);
36334	let extractb: f32 = simd_extract!(b, `0`);
36335	add = extracta * extractb;
36336	}
36337	simd_insert!(a, `0`, add)
36338	}
36339	}
36340
36341	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36342	///
36343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36344	#[inline]
36345	#[target_feature(enable = "avx512f")]
36346	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36347	#[cfg_attr(test, assert_instr(vmulsd))]
36348	pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36349	unsafe {
36350	let extractsrc: f64 = simd_extract!(src, `0`);
36351	let mut add: f64 = extractsrc;
36352	if (k & `0b00000001`) != `0` {
36353	let extracta: f64 = simd_extract!(a, `0`);
36354	let extractb: f64 = simd_extract!(b, `0`);
36355	add = extracta * extractb;
36356	}
36357	simd_insert!(a, `0`, add)
36358	}
36359	}
36360
36361	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36362	///
36363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36364	#[inline]
36365	#[target_feature(enable = "avx512f")]
36366	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36367	#[cfg_attr(test, assert_instr(vmulsd))]
36368	pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36369	unsafe {
36370	let mut add: f64 = `0.`;
36371	if (k & `0b00000001`) != `0` {
36372	let extracta: f64 = simd_extract!(a, `0`);
36373	let extractb: f64 = simd_extract!(b, `0`);
36374	add = extracta * extractb;
36375	}
36376	simd_insert!(a, `0`, add)
36377	}
36378	}
36379
36380	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36381	///
36382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36383	#[inline]
36384	#[target_feature(enable = "avx512f")]
36385	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36386	#[cfg_attr(test, assert_instr(vdivss))]
36387	pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36388	unsafe {
36389	let extractsrc: f32 = simd_extract!(src, `0`);
36390	let mut add: f32 = extractsrc;
36391	if (k & `0b00000001`) != `0` {
36392	let extracta: f32 = simd_extract!(a, `0`);
36393	let extractb: f32 = simd_extract!(b, `0`);
36394	add = extracta / extractb;
36395	}
36396	simd_insert!(a, `0`, add)
36397	}
36398	}
36399
36400	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36401	///
36402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36403	#[inline]
36404	#[target_feature(enable = "avx512f")]
36405	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36406	#[cfg_attr(test, assert_instr(vdivss))]
36407	pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36408	unsafe {
36409	let mut add: f32 = `0.`;
36410	if (k & `0b00000001`) != `0` {
36411	let extracta: f32 = simd_extract!(a, `0`);
36412	let extractb: f32 = simd_extract!(b, `0`);
36413	add = extracta / extractb;
36414	}
36415	simd_insert!(a, `0`, add)
36416	}
36417	}
36418
36419	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36420	///
36421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36422	#[inline]
36423	#[target_feature(enable = "avx512f")]
36424	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36425	#[cfg_attr(test, assert_instr(vdivsd))]
36426	pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36427	unsafe {
36428	let extractsrc: f64 = simd_extract!(src, `0`);
36429	let mut add: f64 = extractsrc;
36430	if (k & `0b00000001`) != `0` {
36431	let extracta: f64 = simd_extract!(a, `0`);
36432	let extractb: f64 = simd_extract!(b, `0`);
36433	add = extracta / extractb;
36434	}
36435	simd_insert!(a, `0`, add)
36436	}
36437	}
36438
36439	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36440	///
36441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36442	#[inline]
36443	#[target_feature(enable = "avx512f")]
36444	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36445	#[cfg_attr(test, assert_instr(vdivsd))]
36446	pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36447	unsafe {
36448	let mut add: f64 = `0.`;
36449	if (k & `0b00000001`) != `0` {
36450	let extracta: f64 = simd_extract!(a, `0`);
36451	let extractb: f64 = simd_extract!(b, `0`);
36452	add = extracta / extractb;
36453	}
36454	simd_insert!(a, `0`, add)
36455	}
36456	}
36457
36458	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36459	///
36460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36461	#[inline]
36462	#[target_feature(enable = "avx512f")]
36463	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36464	#[cfg_attr(test, assert_instr(vmaxss))]
36465	pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36466	unsafe {
36467	transmute(src:vmaxss(
36468	a.as_f32x4(),
36469	b.as_f32x4(),
36470	src.as_f32x4(),
36471	mask:k,
36472	_MM_FROUND_CUR_DIRECTION,
36473	))
36474	}
36475	}
36476
36477	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36478	///
36479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36480	#[inline]
36481	#[target_feature(enable = "avx512f")]
36482	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36483	#[cfg_attr(test, assert_instr(vmaxss))]
36484	pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36485	unsafe {
36486	transmute(src:vmaxss(
36487	a.as_f32x4(),
36488	b.as_f32x4(),
36489	src:f32x4::ZERO,
36490	mask:k,
36491	_MM_FROUND_CUR_DIRECTION,
36492	))
36493	}
36494	}
36495
36496	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36497	///
36498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36499	#[inline]
36500	#[target_feature(enable = "avx512f")]
36501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36502	#[cfg_attr(test, assert_instr(vmaxsd))]
36503	pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36504	unsafe {
36505	transmute(src:vmaxsd(
36506	a.as_f64x2(),
36507	b.as_f64x2(),
36508	src.as_f64x2(),
36509	mask:k,
36510	_MM_FROUND_CUR_DIRECTION,
36511	))
36512	}
36513	}
36514
36515	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36516	///
36517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36518	#[inline]
36519	#[target_feature(enable = "avx512f")]
36520	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36521	#[cfg_attr(test, assert_instr(vmaxsd))]
36522	pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36523	unsafe {
36524	transmute(src:vmaxsd(
36525	a.as_f64x2(),
36526	b.as_f64x2(),
36527	src:f64x2::ZERO,
36528	mask:k,
36529	_MM_FROUND_CUR_DIRECTION,
36530	))
36531	}
36532	}
36533
36534	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36535	///
36536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36537	#[inline]
36538	#[target_feature(enable = "avx512f")]
36539	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36540	#[cfg_attr(test, assert_instr(vminss))]
36541	pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36542	unsafe {
36543	transmute(src:vminss(
36544	a.as_f32x4(),
36545	b.as_f32x4(),
36546	src.as_f32x4(),
36547	mask:k,
36548	_MM_FROUND_CUR_DIRECTION,
36549	))
36550	}
36551	}
36552
36553	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36554	///
36555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36556	#[inline]
36557	#[target_feature(enable = "avx512f")]
36558	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36559	#[cfg_attr(test, assert_instr(vminss))]
36560	pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36561	unsafe {
36562	transmute(src:vminss(
36563	a.as_f32x4(),
36564	b.as_f32x4(),
36565	src:f32x4::ZERO,
36566	mask:k,
36567	_MM_FROUND_CUR_DIRECTION,
36568	))
36569	}
36570	}
36571
36572	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36573	///
36574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36575	#[inline]
36576	#[target_feature(enable = "avx512f")]
36577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36578	#[cfg_attr(test, assert_instr(vminsd))]
36579	pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36580	unsafe {
36581	transmute(src:vminsd(
36582	a.as_f64x2(),
36583	b.as_f64x2(),
36584	src.as_f64x2(),
36585	mask:k,
36586	_MM_FROUND_CUR_DIRECTION,
36587	))
36588	}
36589	}
36590
36591	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36592	///
36593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36594	#[inline]
36595	#[target_feature(enable = "avx512f")]
36596	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36597	#[cfg_attr(test, assert_instr(vminsd))]
36598	pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36599	unsafe {
36600	transmute(src:vminsd(
36601	a.as_f64x2(),
36602	b.as_f64x2(),
36603	src:f64x2::ZERO,
36604	mask:k,
36605	_MM_FROUND_CUR_DIRECTION,
36606	))
36607	}
36608	}
36609
36610	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36611	///
36612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36613	#[inline]
36614	#[target_feature(enable = "avx512f")]
36615	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36616	#[cfg_attr(test, assert_instr(vsqrtss))]
36617	pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36618	unsafe { vsqrtss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
36619	}
36620
36621	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36622	///
36623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36624	#[inline]
36625	#[target_feature(enable = "avx512f")]
36626	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36627	#[cfg_attr(test, assert_instr(vsqrtss))]
36628	pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36629	unsafe { vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, _MM_FROUND_CUR_DIRECTION) }
36630	}
36631
36632	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36633	///
36634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36635	#[inline]
36636	#[target_feature(enable = "avx512f")]
36637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36638	#[cfg_attr(test, assert_instr(vsqrtsd))]
36639	pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36640	unsafe { vsqrtsd(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
36641	}
36642
36643	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36644	///
36645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36646	#[inline]
36647	#[target_feature(enable = "avx512f")]
36648	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36649	#[cfg_attr(test, assert_instr(vsqrtsd))]
36650	pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36651	unsafe { vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, _MM_FROUND_CUR_DIRECTION) }
36652	}
36653
36654	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36655	///
36656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36657	#[inline]
36658	#[target_feature(enable = "avx512f")]
36659	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36660	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36661	pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36662	unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:`0b1`)) }
36663	}
36664
36665	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36666	///
36667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36668	#[inline]
36669	#[target_feature(enable = "avx512f")]
36670	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36671	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36672	pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36673	unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
36674	}
36675
36676	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36677	///
36678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36679	#[inline]
36680	#[target_feature(enable = "avx512f")]
36681	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36682	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36683	pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36684	unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
36685	}
36686
36687	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36688	///
36689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36690	#[inline]
36691	#[target_feature(enable = "avx512f")]
36692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36693	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36694	pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36695	unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:`0b1`)) }
36696	}
36697
36698	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36699	///
36700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36701	#[inline]
36702	#[target_feature(enable = "avx512f")]
36703	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36704	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36705	pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36706	unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
36707	}
36708
36709	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36710	///
36711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36712	#[inline]
36713	#[target_feature(enable = "avx512f")]
36714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36715	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36716	pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36717	unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
36718	}
36719
36720	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36721	///
36722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36723	#[inline]
36724	#[target_feature(enable = "avx512f")]
36725	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36726	#[cfg_attr(test, assert_instr(vrcp14ss))]
36727	pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36728	unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:`0b1`)) }
36729	}
36730
36731	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36732	///
36733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36734	#[inline]
36735	#[target_feature(enable = "avx512f")]
36736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36737	#[cfg_attr(test, assert_instr(vrcp14ss))]
36738	pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36739	unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
36740	}
36741
36742	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36743	///
36744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36745	#[inline]
36746	#[target_feature(enable = "avx512f")]
36747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36748	#[cfg_attr(test, assert_instr(vrcp14ss))]
36749	pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36750	unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
36751	}
36752
36753	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36754	///
36755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36756	#[inline]
36757	#[target_feature(enable = "avx512f")]
36758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36759	#[cfg_attr(test, assert_instr(vrcp14sd))]
36760	pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36761	unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:`0b1`)) }
36762	}
36763
36764	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36765	///
36766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36767	#[inline]
36768	#[target_feature(enable = "avx512f")]
36769	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36770	#[cfg_attr(test, assert_instr(vrcp14sd))]
36771	pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36772	unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
36773	}
36774
36775	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36776	///
36777	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36778	#[inline]
36779	#[target_feature(enable = "avx512f")]
36780	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36781	#[cfg_attr(test, assert_instr(vrcp14sd))]
36782	pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36783	unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
36784	}
36785
36786	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36787	///
36788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36789	#[inline]
36790	#[target_feature(enable = "avx512f")]
36791	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36792	#[cfg_attr(test, assert_instr(vgetexpss))]
36793	pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36794	unsafe {
36795	transmute(src:vgetexpss(
36796	a.as_f32x4(),
36797	b.as_f32x4(),
36798	src:f32x4::ZERO,
36799	mask:`0b1`,
36800	_MM_FROUND_NO_EXC,
36801	))
36802	}
36803	}
36804
36805	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36806	///
36807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36808	#[inline]
36809	#[target_feature(enable = "avx512f")]
36810	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36811	#[cfg_attr(test, assert_instr(vgetexpss))]
36812	pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36813	unsafe {
36814	transmute(src:vgetexpss(
36815	a.as_f32x4(),
36816	b.as_f32x4(),
36817	src.as_f32x4(),
36818	mask:k,
36819	_MM_FROUND_NO_EXC,
36820	))
36821	}
36822	}
36823
36824	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36825	///
36826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36827	#[inline]
36828	#[target_feature(enable = "avx512f")]
36829	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36830	#[cfg_attr(test, assert_instr(vgetexpss))]
36831	pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36832	unsafe {
36833	transmute(src:vgetexpss(
36834	a.as_f32x4(),
36835	b.as_f32x4(),
36836	src:f32x4::ZERO,
36837	mask:k,
36838	_MM_FROUND_NO_EXC,
36839	))
36840	}
36841	}
36842
36843	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36844	///
36845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36846	#[inline]
36847	#[target_feature(enable = "avx512f")]
36848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36849	#[cfg_attr(test, assert_instr(vgetexpsd))]
36850	pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36851	unsafe {
36852	transmute(src:vgetexpsd(
36853	a.as_f64x2(),
36854	b.as_f64x2(),
36855	src:f64x2::ZERO,
36856	mask:`0b1`,
36857	_MM_FROUND_NO_EXC,
36858	))
36859	}
36860	}
36861
36862	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36863	///
36864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36865	#[inline]
36866	#[target_feature(enable = "avx512f")]
36867	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36868	#[cfg_attr(test, assert_instr(vgetexpsd))]
36869	pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36870	unsafe {
36871	transmute(src:vgetexpsd(
36872	a.as_f64x2(),
36873	b.as_f64x2(),
36874	src.as_f64x2(),
36875	mask:k,
36876	_MM_FROUND_NO_EXC,
36877	))
36878	}
36879	}
36880
36881	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36882	///
36883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36884	#[inline]
36885	#[target_feature(enable = "avx512f")]
36886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36887	#[cfg_attr(test, assert_instr(vgetexpsd))]
36888	pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36889	unsafe {
36890	transmute(src:vgetexpsd(
36891	a.as_f64x2(),
36892	b.as_f64x2(),
36893	src:f64x2::ZERO,
36894	mask:k,
36895	_MM_FROUND_NO_EXC,
36896	))
36897	}
36898	}
36899
36900	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36901	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36902	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36903	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36904	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36905	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36906	/// The sign is determined by sc which can take the following values:\
36907	/// _MM_MANT_SIGN_src // sign = sign(src)\
36908	/// _MM_MANT_SIGN_zero // sign = 0\
36909	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36910	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36911	///
36912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36913	#[inline]
36914	#[target_feature(enable = "avx512f")]
36915	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36916	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
36917	#[rustc_legacy_const_generics(`2`, `3`)]
36918	pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36919	a: __m128,
36920	b: __m128,
36921	) -> __m128 {
36922	unsafe {
36923	static_assert_uimm_bits!(NORM, `4`);
36924	static_assert_uimm_bits!(SIGN, `2`);
36925	let a: f32x4 = a.as_f32x4();
36926	let b: f32x4 = b.as_f32x4();
36927	let r: f32x4 = vgetmantss(
36928	a,
36929	b,
36930	SIGN << `2` \| NORM,
36931	src:f32x4::ZERO,
36932	m:`0b1`,
36933	_MM_FROUND_CUR_DIRECTION,
36934	);
36935	transmute(src:r)
36936	}
36937	}
36938
36939	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36940	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36941	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36942	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36943	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36944	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36945	/// The sign is determined by sc which can take the following values:\
36946	/// _MM_MANT_SIGN_src // sign = sign(src)\
36947	/// _MM_MANT_SIGN_zero // sign = 0\
36948	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36949	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36950	///
36951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
36952	#[inline]
36953	#[target_feature(enable = "avx512f")]
36954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36955	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
36956	#[rustc_legacy_const_generics(`4`, `5`)]
36957	pub fn _mm_mask_getmant_ss<
36958	const NORM: _MM_MANTISSA_NORM_ENUM,
36959	const SIGN: _MM_MANTISSA_SIGN_ENUM,
36960	>(
36961	src: __m128,
36962	k: __mmask8,
36963	a: __m128,
36964	b: __m128,
36965	) -> __m128 {
36966	unsafe {
36967	static_assert_uimm_bits!(NORM, `4`);
36968	static_assert_uimm_bits!(SIGN, `2`);
36969	let a: f32x4 = a.as_f32x4();
36970	let b: f32x4 = b.as_f32x4();
36971	let src: f32x4 = src.as_f32x4();
36972	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
36973	transmute(src:r)
36974	}
36975	}
36976
36977	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36978	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36979	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36980	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36981	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36982	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36983	/// The sign is determined by sc which can take the following values:\
36984	/// _MM_MANT_SIGN_src // sign = sign(src)\
36985	/// _MM_MANT_SIGN_zero // sign = 0\
36986	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36987	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36988	///
36989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
36990	#[inline]
36991	#[target_feature(enable = "avx512f")]
36992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36993	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
36994	#[rustc_legacy_const_generics(`3`, `4`)]
36995	pub fn _mm_maskz_getmant_ss<
36996	const NORM: _MM_MANTISSA_NORM_ENUM,
36997	const SIGN: _MM_MANTISSA_SIGN_ENUM,
36998	>(
36999	k: __mmask8,
37000	a: __m128,
37001	b: __m128,
37002	) -> __m128 {
37003	unsafe {
37004	static_assert_uimm_bits!(NORM, `4`);
37005	static_assert_uimm_bits!(SIGN, `2`);
37006	let a: f32x4 = a.as_f32x4();
37007	let b: f32x4 = b.as_f32x4();
37008	let r: f32x4 = vgetmantss(
37009	a,
37010	b,
37011	SIGN << `2` \| NORM,
37012	src:f32x4::ZERO,
37013	m:k,
37014	_MM_FROUND_CUR_DIRECTION,
37015	);
37016	transmute(src:r)
37017	}
37018	}
37019
37020	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37021	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37022	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37023	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37024	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37025	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37026	/// The sign is determined by sc which can take the following values:\
37027	/// _MM_MANT_SIGN_src // sign = sign(src)\
37028	/// _MM_MANT_SIGN_zero // sign = 0\
37029	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37030	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37031	///
37032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37033	#[inline]
37034	#[target_feature(enable = "avx512f")]
37035	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37036	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
37037	#[rustc_legacy_const_generics(`2`, `3`)]
37038	pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37039	a: __m128d,
37040	b: __m128d,
37041	) -> __m128d {
37042	unsafe {
37043	static_assert_uimm_bits!(NORM, `4`);
37044	static_assert_uimm_bits!(SIGN, `2`);
37045	let a: f64x2 = a.as_f64x2();
37046	let b: f64x2 = b.as_f64x2();
37047	let r: f64x2 = vgetmantsd(
37048	a,
37049	b,
37050	SIGN << `2` \| NORM,
37051	src:f64x2::ZERO,
37052	m:`0b1`,
37053	_MM_FROUND_CUR_DIRECTION,
37054	);
37055	transmute(src:r)
37056	}
37057	}
37058
37059	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37060	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37061	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37062	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37063	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37064	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37065	/// The sign is determined by sc which can take the following values:\
37066	/// _MM_MANT_SIGN_src // sign = sign(src)\
37067	/// _MM_MANT_SIGN_zero // sign = 0\
37068	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37069	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37070	///
37071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37072	#[inline]
37073	#[target_feature(enable = "avx512f")]
37074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37075	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
37076	#[rustc_legacy_const_generics(`4`, `5`)]
37077	pub fn _mm_mask_getmant_sd<
37078	const NORM: _MM_MANTISSA_NORM_ENUM,
37079	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37080	>(
37081	src: __m128d,
37082	k: __mmask8,
37083	a: __m128d,
37084	b: __m128d,
37085	) -> __m128d {
37086	unsafe {
37087	static_assert_uimm_bits!(NORM, `4`);
37088	static_assert_uimm_bits!(SIGN, `2`);
37089	let a: f64x2 = a.as_f64x2();
37090	let b: f64x2 = b.as_f64x2();
37091	let src: f64x2 = src.as_f64x2();
37092	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
37093	transmute(src:r)
37094	}
37095	}
37096
37097	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37098	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37099	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37100	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37101	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37102	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37103	/// The sign is determined by sc which can take the following values:\
37104	/// _MM_MANT_SIGN_src // sign = sign(src)\
37105	/// _MM_MANT_SIGN_zero // sign = 0\
37106	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37107	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37108	///
37109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37110	#[inline]
37111	#[target_feature(enable = "avx512f")]
37112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37113	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
37114	#[rustc_legacy_const_generics(`3`, `4`)]
37115	pub fn _mm_maskz_getmant_sd<
37116	const NORM: _MM_MANTISSA_NORM_ENUM,
37117	const SIGN: _MM_MANTISSA_SIGN_ENUM,
37118	>(
37119	k: __mmask8,
37120	a: __m128d,
37121	b: __m128d,
37122	) -> __m128d {
37123	unsafe {
37124	static_assert_uimm_bits!(NORM, `4`);
37125	static_assert_uimm_bits!(SIGN, `2`);
37126	let a: f64x2 = a.as_f64x2();
37127	let b: f64x2 = b.as_f64x2();
37128	let r: f64x2 = vgetmantsd(
37129	a,
37130	b,
37131	SIGN << `2` \| NORM,
37132	src:f64x2::ZERO,
37133	m:k,
37134	_MM_FROUND_CUR_DIRECTION,
37135	);
37136	transmute(src:r)
37137	}
37138	}
37139
37140	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37141	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37142	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37143	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37144	/// * [`_MM_FROUND_TO_POS_INF`] : round up
37145	/// * [`_MM_FROUND_TO_ZERO`] : truncate
37146	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37147	///
37148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37149	#[inline]
37150	#[target_feature(enable = "avx512f")]
37151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37152	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `255`))]
37153	#[rustc_legacy_const_generics(`2`)]
37154	pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37155	unsafe {
37156	static_assert_uimm_bits!(IMM8, `8`);
37157	let a: f32x4 = a.as_f32x4();
37158	let b: f32x4 = b.as_f32x4();
37159	let r: f32x4 = vrndscaless(
37160	a,
37161	b,
37162	src:f32x4::ZERO,
37163	mask:`0b11111111`,
37164	IMM8,
37165	_MM_FROUND_CUR_DIRECTION,
37166	);
37167	transmute(src:r)
37168	}
37169	}
37170
37171	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37172	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37173	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37174	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37175	/// * [`_MM_FROUND_TO_POS_INF`] : round up
37176	/// * [`_MM_FROUND_TO_ZERO`] : truncate
37177	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37178	///
37179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37180	#[inline]
37181	#[target_feature(enable = "avx512f")]
37182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37183	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`))]
37184	#[rustc_legacy_const_generics(`4`)]
37185	pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37186	src: __m128,
37187	k: __mmask8,
37188	a: __m128,
37189	b: __m128,
37190	) -> __m128 {
37191	unsafe {
37192	static_assert_uimm_bits!(IMM8, `8`);
37193	let a: f32x4 = a.as_f32x4();
37194	let b: f32x4 = b.as_f32x4();
37195	let src: f32x4 = src.as_f32x4();
37196	let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37197	transmute(src:r)
37198	}
37199	}
37200
37201	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37202	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37203	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37204	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37205	/// * [`_MM_FROUND_TO_POS_INF`] : round up
37206	/// * [`_MM_FROUND_TO_ZERO`] : truncate
37207	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37208	///
37209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37210	#[inline]
37211	#[target_feature(enable = "avx512f")]
37212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37213	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`))]
37214	#[rustc_legacy_const_generics(`3`)]
37215	pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37216	unsafe {
37217	static_assert_uimm_bits!(IMM8, `8`);
37218	let a: f32x4 = a.as_f32x4();
37219	let b: f32x4 = b.as_f32x4();
37220	let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37221	transmute(src:r)
37222	}
37223	}
37224
37225	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37226	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37227	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37228	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37229	/// * [`_MM_FROUND_TO_POS_INF`] : round up
37230	/// * [`_MM_FROUND_TO_ZERO`] : truncate
37231	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37232	///
37233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37234	#[inline]
37235	#[target_feature(enable = "avx512f")]
37236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37237	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `255`))]
37238	#[rustc_legacy_const_generics(`2`)]
37239	pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37240	unsafe {
37241	static_assert_uimm_bits!(IMM8, `8`);
37242	let a: f64x2 = a.as_f64x2();
37243	let b: f64x2 = b.as_f64x2();
37244	let r: f64x2 = vrndscalesd(
37245	a,
37246	b,
37247	src:f64x2::ZERO,
37248	mask:`0b11111111`,
37249	IMM8,
37250	_MM_FROUND_CUR_DIRECTION,
37251	);
37252	transmute(src:r)
37253	}
37254	}
37255
37256	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37257	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37258	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37259	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37260	/// * [`_MM_FROUND_TO_POS_INF`] : round up
37261	/// * [`_MM_FROUND_TO_ZERO`] : truncate
37262	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37263	///
37264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37265	#[inline]
37266	#[target_feature(enable = "avx512f")]
37267	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37268	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`))]
37269	#[rustc_legacy_const_generics(`4`)]
37270	pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37271	src: __m128d,
37272	k: __mmask8,
37273	a: __m128d,
37274	b: __m128d,
37275	) -> __m128d {
37276	unsafe {
37277	static_assert_uimm_bits!(IMM8, `8`);
37278	let a: f64x2 = a.as_f64x2();
37279	let b: f64x2 = b.as_f64x2();
37280	let src: f64x2 = src.as_f64x2();
37281	let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37282	transmute(src:r)
37283	}
37284	}
37285
37286	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37287	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37288	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37289	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37290	/// * [`_MM_FROUND_TO_POS_INF`] : round up
37291	/// * [`_MM_FROUND_TO_ZERO`] : truncate
37292	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37293	///
37294	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37295	#[inline]
37296	#[target_feature(enable = "avx512f")]
37297	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37298	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`))]
37299	#[rustc_legacy_const_generics(`3`)]
37300	pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37301	unsafe {
37302	static_assert_uimm_bits!(IMM8, `8`);
37303	let a: f64x2 = a.as_f64x2();
37304	let b: f64x2 = b.as_f64x2();
37305	let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37306	transmute(src:r)
37307	}
37308	}
37309
37310	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37311	///
37312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37313	#[inline]
37314	#[target_feature(enable = "avx512f")]
37315	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37316	#[cfg_attr(test, assert_instr(vscalefss))]
37317	pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37318	unsafe {
37319	let a: f32x4 = a.as_f32x4();
37320	let b: f32x4 = b.as_f32x4();
37321	transmute(src:vscalefss(
37322	a,
37323	b,
37324	src:f32x4::ZERO,
37325	mask:`0b11111111`,
37326	_MM_FROUND_CUR_DIRECTION,
37327	))
37328	}
37329	}
37330
37331	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37332	///
37333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37334	#[inline]
37335	#[target_feature(enable = "avx512f")]
37336	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37337	#[cfg_attr(test, assert_instr(vscalefss))]
37338	pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37339	unsafe {
37340	let a: f32x4 = a.as_f32x4();
37341	let b: f32x4 = b.as_f32x4();
37342	let src: f32x4 = src.as_f32x4();
37343	transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
37344	}
37345	}
37346
37347	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37348	///
37349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37350	#[inline]
37351	#[target_feature(enable = "avx512f")]
37352	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37353	#[cfg_attr(test, assert_instr(vscalefss))]
37354	pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37355	unsafe {
37356	transmute(src:vscalefss(
37357	a.as_f32x4(),
37358	b.as_f32x4(),
37359	src:f32x4::ZERO,
37360	mask:k,
37361	_MM_FROUND_CUR_DIRECTION,
37362	))
37363	}
37364	}
37365
37366	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37367	///
37368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37369	#[inline]
37370	#[target_feature(enable = "avx512f")]
37371	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37372	#[cfg_attr(test, assert_instr(vscalefsd))]
37373	pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37374	unsafe {
37375	transmute(src:vscalefsd(
37376	a.as_f64x2(),
37377	b.as_f64x2(),
37378	src:f64x2::ZERO,
37379	mask:`0b11111111`,
37380	_MM_FROUND_CUR_DIRECTION,
37381	))
37382	}
37383	}
37384
37385	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37386	///
37387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37388	#[inline]
37389	#[target_feature(enable = "avx512f")]
37390	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37391	#[cfg_attr(test, assert_instr(vscalefsd))]
37392	pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37393	unsafe {
37394	transmute(src:vscalefsd(
37395	a.as_f64x2(),
37396	b.as_f64x2(),
37397	src.as_f64x2(),
37398	mask:k,
37399	_MM_FROUND_CUR_DIRECTION,
37400	))
37401	}
37402	}
37403
37404	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37405	///
37406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37407	#[inline]
37408	#[target_feature(enable = "avx512f")]
37409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37410	#[cfg_attr(test, assert_instr(vscalefsd))]
37411	pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37412	unsafe {
37413	transmute(src:vscalefsd(
37414	a.as_f64x2(),
37415	b.as_f64x2(),
37416	src:f64x2::ZERO,
37417	mask:k,
37418	_MM_FROUND_CUR_DIRECTION,
37419	))
37420	}
37421	}
37422
37423	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37424	///
37425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37426	#[inline]
37427	#[target_feature(enable = "avx512f")]
37428	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37429	#[cfg_attr(test, assert_instr(vfmadd))]
37430	pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37431	unsafe {
37432	let mut fmadd: f32 = simd_extract!(a, `0`);
37433	if (k & `0b00000001`) != `0` {
37434	let extractb: f32 = simd_extract!(b, `0`);
37435	let extractc: f32 = simd_extract!(c, `0`);
37436	fmadd = fmaf32(a:fmadd, b:extractb, c:extractc);
37437	}
37438	simd_insert!(a, `0`, fmadd)
37439	}
37440	}
37441
37442	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37443	///
37444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37445	#[inline]
37446	#[target_feature(enable = "avx512f")]
37447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37448	#[cfg_attr(test, assert_instr(vfmadd))]
37449	pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37450	unsafe {
37451	let mut fmadd: f32 = `0.`;
37452	if (k & `0b00000001`) != `0` {
37453	let extracta: f32 = simd_extract!(a, `0`);
37454	let extractb: f32 = simd_extract!(b, `0`);
37455	let extractc: f32 = simd_extract!(c, `0`);
37456	fmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37457	}
37458	simd_insert!(a, `0`, fmadd)
37459	}
37460	}
37461
37462	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37463	///
37464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37465	#[inline]
37466	#[target_feature(enable = "avx512f")]
37467	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37468	#[cfg_attr(test, assert_instr(vfmadd))]
37469	pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37470	unsafe {
37471	let mut fmadd: f32 = simd_extract!(c, `0`);
37472	if (k & `0b00000001`) != `0` {
37473	let extracta: f32 = simd_extract!(a, `0`);
37474	let extractb: f32 = simd_extract!(b, `0`);
37475	fmadd = fmaf32(a:extracta, b:extractb, c:fmadd);
37476	}
37477	simd_insert!(c, `0`, fmadd)
37478	}
37479	}
37480
37481	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37482	///
37483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37484	#[inline]
37485	#[target_feature(enable = "avx512f")]
37486	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37487	#[cfg_attr(test, assert_instr(vfmadd))]
37488	pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37489	unsafe {
37490	let mut fmadd: f64 = simd_extract!(a, `0`);
37491	if (k & `0b00000001`) != `0` {
37492	let extractb: f64 = simd_extract!(b, `0`);
37493	let extractc: f64 = simd_extract!(c, `0`);
37494	fmadd = fmaf64(a:fmadd, b:extractb, c:extractc);
37495	}
37496	simd_insert!(a, `0`, fmadd)
37497	}
37498	}
37499
37500	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37501	///
37502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37503	#[inline]
37504	#[target_feature(enable = "avx512f")]
37505	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37506	#[cfg_attr(test, assert_instr(vfmadd))]
37507	pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37508	unsafe {
37509	let mut fmadd: f64 = `0.`;
37510	if (k & `0b00000001`) != `0` {
37511	let extracta: f64 = simd_extract!(a, `0`);
37512	let extractb: f64 = simd_extract!(b, `0`);
37513	let extractc: f64 = simd_extract!(c, `0`);
37514	fmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37515	}
37516	simd_insert!(a, `0`, fmadd)
37517	}
37518	}
37519
37520	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37521	///
37522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37523	#[inline]
37524	#[target_feature(enable = "avx512f")]
37525	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37526	#[cfg_attr(test, assert_instr(vfmadd))]
37527	pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37528	unsafe {
37529	let mut fmadd: f64 = simd_extract!(c, `0`);
37530	if (k & `0b00000001`) != `0` {
37531	let extracta: f64 = simd_extract!(a, `0`);
37532	let extractb: f64 = simd_extract!(b, `0`);
37533	fmadd = fmaf64(a:extracta, b:extractb, c:fmadd);
37534	}
37535	simd_insert!(c, `0`, fmadd)
37536	}
37537	}
37538
37539	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37540	///
37541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37542	#[inline]
37543	#[target_feature(enable = "avx512f")]
37544	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37545	#[cfg_attr(test, assert_instr(vfmsub))]
37546	pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37547	unsafe {
37548	let mut fmsub: f32 = simd_extract!(a, `0`);
37549	if (k & `0b00000001`) != `0` {
37550	let extractb: f32 = simd_extract!(b, `0`);
37551	let extractc: f32 = simd_extract!(c, `0`);
37552	let extractc: f32 = -extractc;
37553	fmsub = fmaf32(a:fmsub, b:extractb, c:extractc);
37554	}
37555	simd_insert!(a, `0`, fmsub)
37556	}
37557	}
37558
37559	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37560	///
37561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37562	#[inline]
37563	#[target_feature(enable = "avx512f")]
37564	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37565	#[cfg_attr(test, assert_instr(vfmsub))]
37566	pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37567	unsafe {
37568	let mut fmsub: f32 = `0.`;
37569	if (k & `0b00000001`) != `0` {
37570	let extracta: f32 = simd_extract!(a, `0`);
37571	let extractb: f32 = simd_extract!(b, `0`);
37572	let extractc: f32 = simd_extract!(c, `0`);
37573	let extractc: f32 = -extractc;
37574	fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37575	}
37576	simd_insert!(a, `0`, fmsub)
37577	}
37578	}
37579
37580	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37581	///
37582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37583	#[inline]
37584	#[target_feature(enable = "avx512f")]
37585	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37586	#[cfg_attr(test, assert_instr(vfmsub))]
37587	pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37588	unsafe {
37589	let mut fmsub: f32 = simd_extract!(c, `0`);
37590	if (k & `0b00000001`) != `0` {
37591	let extracta: f32 = simd_extract!(a, `0`);
37592	let extractb: f32 = simd_extract!(b, `0`);
37593	let extractc: f32 = -fmsub;
37594	fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37595	}
37596	simd_insert!(c, `0`, fmsub)
37597	}
37598	}
37599
37600	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37601	///
37602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37603	#[inline]
37604	#[target_feature(enable = "avx512f")]
37605	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37606	#[cfg_attr(test, assert_instr(vfmsub))]
37607	pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37608	unsafe {
37609	let mut fmsub: f64 = simd_extract!(a, `0`);
37610	if (k & `0b00000001`) != `0` {
37611	let extractb: f64 = simd_extract!(b, `0`);
37612	let extractc: f64 = simd_extract!(c, `0`);
37613	let extractc: f64 = -extractc;
37614	fmsub = fmaf64(a:fmsub, b:extractb, c:extractc);
37615	}
37616	simd_insert!(a, `0`, fmsub)
37617	}
37618	}
37619
37620	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37621	///
37622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37623	#[inline]
37624	#[target_feature(enable = "avx512f")]
37625	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37626	#[cfg_attr(test, assert_instr(vfmsub))]
37627	pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37628	unsafe {
37629	let mut fmsub: f64 = `0.`;
37630	if (k & `0b00000001`) != `0` {
37631	let extracta: f64 = simd_extract!(a, `0`);
37632	let extractb: f64 = simd_extract!(b, `0`);
37633	let extractc: f64 = simd_extract!(c, `0`);
37634	let extractc: f64 = -extractc;
37635	fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37636	}
37637	simd_insert!(a, `0`, fmsub)
37638	}
37639	}
37640
37641	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37642	///
37643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37644	#[inline]
37645	#[target_feature(enable = "avx512f")]
37646	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37647	#[cfg_attr(test, assert_instr(vfmsub))]
37648	pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37649	unsafe {
37650	let mut fmsub: f64 = simd_extract!(c, `0`);
37651	if (k & `0b00000001`) != `0` {
37652	let extracta: f64 = simd_extract!(a, `0`);
37653	let extractb: f64 = simd_extract!(b, `0`);
37654	let extractc: f64 = -fmsub;
37655	fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37656	}
37657	simd_insert!(c, `0`, fmsub)
37658	}
37659	}
37660
37661	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37662	///
37663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37664	#[inline]
37665	#[target_feature(enable = "avx512f")]
37666	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37667	#[cfg_attr(test, assert_instr(vfnmadd))]
37668	pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37669	unsafe {
37670	let mut fnmadd: f32 = simd_extract!(a, `0`);
37671	if (k & `0b00000001`) != `0` {
37672	let extracta: f32 = -fnmadd;
37673	let extractb: f32 = simd_extract!(b, `0`);
37674	let extractc: f32 = simd_extract!(c, `0`);
37675	fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37676	}
37677	simd_insert!(a, `0`, fnmadd)
37678	}
37679	}
37680
37681	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37682	///
37683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37684	#[inline]
37685	#[target_feature(enable = "avx512f")]
37686	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37687	#[cfg_attr(test, assert_instr(vfnmadd))]
37688	pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37689	unsafe {
37690	let mut fnmadd: f32 = `0.`;
37691	if (k & `0b00000001`) != `0` {
37692	let extracta: f32 = simd_extract!(a, `0`);
37693	let extracta: f32 = -extracta;
37694	let extractb: f32 = simd_extract!(b, `0`);
37695	let extractc: f32 = simd_extract!(c, `0`);
37696	fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37697	}
37698	simd_insert!(a, `0`, fnmadd)
37699	}
37700	}
37701
37702	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37703	///
37704	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37705	#[inline]
37706	#[target_feature(enable = "avx512f")]
37707	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37708	#[cfg_attr(test, assert_instr(vfnmadd))]
37709	pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37710	unsafe {
37711	let mut fnmadd: f32 = simd_extract!(c, `0`);
37712	if (k & `0b00000001`) != `0` {
37713	let extracta: f32 = simd_extract!(a, `0`);
37714	let extracta: f32 = -extracta;
37715	let extractb: f32 = simd_extract!(b, `0`);
37716	fnmadd = fmaf32(a:extracta, b:extractb, c:fnmadd);
37717	}
37718	simd_insert!(c, `0`, fnmadd)
37719	}
37720	}
37721
37722	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37723	///
37724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37725	#[inline]
37726	#[target_feature(enable = "avx512f")]
37727	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37728	#[cfg_attr(test, assert_instr(vfnmadd))]
37729	pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37730	unsafe {
37731	let mut fnmadd: f64 = simd_extract!(a, `0`);
37732	if (k & `0b00000001`) != `0` {
37733	let extracta: f64 = -fnmadd;
37734	let extractb: f64 = simd_extract!(b, `0`);
37735	let extractc: f64 = simd_extract!(c, `0`);
37736	fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37737	}
37738	simd_insert!(a, `0`, fnmadd)
37739	}
37740	}
37741
37742	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37743	///
37744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37745	#[inline]
37746	#[target_feature(enable = "avx512f")]
37747	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37748	#[cfg_attr(test, assert_instr(vfnmadd))]
37749	pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37750	unsafe {
37751	let mut fnmadd: f64 = `0.`;
37752	if (k & `0b00000001`) != `0` {
37753	let extracta: f64 = simd_extract!(a, `0`);
37754	let extracta: f64 = -extracta;
37755	let extractb: f64 = simd_extract!(b, `0`);
37756	let extractc: f64 = simd_extract!(c, `0`);
37757	fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37758	}
37759	simd_insert!(a, `0`, fnmadd)
37760	}
37761	}
37762
37763	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37764	///
37765	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37766	#[inline]
37767	#[target_feature(enable = "avx512f")]
37768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37769	#[cfg_attr(test, assert_instr(vfnmadd))]
37770	pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37771	unsafe {
37772	let mut fnmadd: f64 = simd_extract!(c, `0`);
37773	if (k & `0b00000001`) != `0` {
37774	let extracta: f64 = simd_extract!(a, `0`);
37775	let extracta: f64 = -extracta;
37776	let extractb: f64 = simd_extract!(b, `0`);
37777	fnmadd = fmaf64(a:extracta, b:extractb, c:fnmadd);
37778	}
37779	simd_insert!(c, `0`, fnmadd)
37780	}
37781	}
37782
37783	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37784	///
37785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37786	#[inline]
37787	#[target_feature(enable = "avx512f")]
37788	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37789	#[cfg_attr(test, assert_instr(vfnmsub))]
37790	pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37791	unsafe {
37792	let mut fnmsub: f32 = simd_extract!(a, `0`);
37793	if (k & `0b00000001`) != `0` {
37794	let extracta: f32 = -fnmsub;
37795	let extractb: f32 = simd_extract!(b, `0`);
37796	let extractc: f32 = simd_extract!(c, `0`);
37797	let extractc: f32 = -extractc;
37798	fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37799	}
37800	simd_insert!(a, `0`, fnmsub)
37801	}
37802	}
37803
37804	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37805	///
37806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37807	#[inline]
37808	#[target_feature(enable = "avx512f")]
37809	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37810	#[cfg_attr(test, assert_instr(vfnmsub))]
37811	pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37812	unsafe {
37813	let mut fnmsub: f32 = `0.`;
37814	if (k & `0b00000001`) != `0` {
37815	let extracta: f32 = simd_extract!(a, `0`);
37816	let extracta: f32 = -extracta;
37817	let extractb: f32 = simd_extract!(b, `0`);
37818	let extractc: f32 = simd_extract!(c, `0`);
37819	let extractc: f32 = -extractc;
37820	fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37821	}
37822	simd_insert!(a, `0`, fnmsub)
37823	}
37824	}
37825
37826	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37827	///
37828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37829	#[inline]
37830	#[target_feature(enable = "avx512f")]
37831	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37832	#[cfg_attr(test, assert_instr(vfnmsub))]
37833	pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37834	unsafe {
37835	let mut fnmsub: f32 = simd_extract!(c, `0`);
37836	if (k & `0b00000001`) != `0` {
37837	let extracta: f32 = simd_extract!(a, `0`);
37838	let extracta: f32 = -extracta;
37839	let extractb: f32 = simd_extract!(b, `0`);
37840	let extractc: f32 = -fnmsub;
37841	fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37842	}
37843	simd_insert!(c, `0`, fnmsub)
37844	}
37845	}
37846
37847	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37848	///
37849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37850	#[inline]
37851	#[target_feature(enable = "avx512f")]
37852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37853	#[cfg_attr(test, assert_instr(vfnmsub))]
37854	pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37855	unsafe {
37856	let mut fnmsub: f64 = simd_extract!(a, `0`);
37857	if (k & `0b00000001`) != `0` {
37858	let extracta: f64 = -fnmsub;
37859	let extractb: f64 = simd_extract!(b, `0`);
37860	let extractc: f64 = simd_extract!(c, `0`);
37861	let extractc: f64 = -extractc;
37862	fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37863	}
37864	simd_insert!(a, `0`, fnmsub)
37865	}
37866	}
37867
37868	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37869	///
37870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37871	#[inline]
37872	#[target_feature(enable = "avx512f")]
37873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37874	#[cfg_attr(test, assert_instr(vfnmsub))]
37875	pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37876	unsafe {
37877	let mut fnmsub: f64 = `0.`;
37878	if (k & `0b00000001`) != `0` {
37879	let extracta: f64 = simd_extract!(a, `0`);
37880	let extracta: f64 = -extracta;
37881	let extractb: f64 = simd_extract!(b, `0`);
37882	let extractc: f64 = simd_extract!(c, `0`);
37883	let extractc: f64 = -extractc;
37884	fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37885	}
37886	simd_insert!(a, `0`, fnmsub)
37887	}
37888	}
37889
37890	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37891	///
37892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37893	#[inline]
37894	#[target_feature(enable = "avx512f")]
37895	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37896	#[cfg_attr(test, assert_instr(vfnmsub))]
37897	pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37898	unsafe {
37899	let mut fnmsub: f64 = simd_extract!(c, `0`);
37900	if (k & `0b00000001`) != `0` {
37901	let extracta: f64 = simd_extract!(a, `0`);
37902	let extracta: f64 = -extracta;
37903	let extractb: f64 = simd_extract!(b, `0`);
37904	let extractc: f64 = -fnmsub;
37905	fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37906	}
37907	simd_insert!(c, `0`, fnmsub)
37908	}
37909	}
37910
37911	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37912	///
37913	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37914	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37915	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37916	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37917	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37918	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37919	///
37920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37921	#[inline]
37922	#[target_feature(enable = "avx512f")]
37923	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37924	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
37925	#[rustc_legacy_const_generics(`2`)]
37926	pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37927	unsafe {
37928	static_assert_rounding!(ROUNDING);
37929	let a: f32x4 = a.as_f32x4();
37930	let b: f32x4 = b.as_f32x4();
37931	let r: f32x4 = vaddss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
37932	transmute(src:r)
37933	}
37934	}
37935
37936	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37937	///
37938	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37939	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37940	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37941	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37942	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37943	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37944	///
37945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
37946	#[inline]
37947	#[target_feature(enable = "avx512f")]
37948	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37949	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
37950	#[rustc_legacy_const_generics(`4`)]
37951	pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
37952	src: __m128,
37953	k: __mmask8,
37954	a: __m128,
37955	b: __m128,
37956	) -> __m128 {
37957	unsafe {
37958	static_assert_rounding!(ROUNDING);
37959	let a: f32x4 = a.as_f32x4();
37960	let b: f32x4 = b.as_f32x4();
37961	let src: f32x4 = src.as_f32x4();
37962	let r: f32x4 = vaddss(a, b, src, mask:k, ROUNDING);
37963	transmute(src:r)
37964	}
37965	}
37966
37967	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37968	///
37969	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37970	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37971	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37972	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37973	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37974	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37975	///
37976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
37977	#[inline]
37978	#[target_feature(enable = "avx512f")]
37979	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37980	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
37981	#[rustc_legacy_const_generics(`3`)]
37982	pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983	unsafe {
37984	static_assert_rounding!(ROUNDING);
37985	let a: f32x4 = a.as_f32x4();
37986	let b: f32x4 = b.as_f32x4();
37987	let r: f32x4 = vaddss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
37988	transmute(src:r)
37989	}
37990	}
37991
37992	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37993	///
37994	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37995	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37996	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37997	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37998	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37999	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38000	///
38001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38002	#[inline]
38003	#[target_feature(enable = "avx512f")]
38004	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38005	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
38006	#[rustc_legacy_const_generics(`2`)]
38007	pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38008	unsafe {
38009	static_assert_rounding!(ROUNDING);
38010	let a: f64x2 = a.as_f64x2();
38011	let b: f64x2 = b.as_f64x2();
38012	let r: f64x2 = vaddsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
38013	transmute(src:r)
38014	}
38015	}
38016
38017	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38018	///
38019	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38020	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38021	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38022	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38023	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38024	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38025	///
38026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38027	#[inline]
38028	#[target_feature(enable = "avx512f")]
38029	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38030	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
38031	#[rustc_legacy_const_generics(`4`)]
38032	pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38033	src: __m128d,
38034	k: __mmask8,
38035	a: __m128d,
38036	b: __m128d,
38037	) -> __m128d {
38038	unsafe {
38039	static_assert_rounding!(ROUNDING);
38040	let a: f64x2 = a.as_f64x2();
38041	let b: f64x2 = b.as_f64x2();
38042	let src: f64x2 = src.as_f64x2();
38043	let r: f64x2 = vaddsd(a, b, src, mask:k, ROUNDING);
38044	transmute(src:r)
38045	}
38046	}
38047
38048	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38049	///
38050	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38051	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38052	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38053	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38054	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38055	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38056	///
38057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38058	#[inline]
38059	#[target_feature(enable = "avx512f")]
38060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38061	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
38062	#[rustc_legacy_const_generics(`3`)]
38063	pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38064	unsafe {
38065	static_assert_rounding!(ROUNDING);
38066	let a: f64x2 = a.as_f64x2();
38067	let b: f64x2 = b.as_f64x2();
38068	let r: f64x2 = vaddsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38069	transmute(src:r)
38070	}
38071	}
38072
38073	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38074	///
38075	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38076	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38077	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38078	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38079	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38080	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38081	///
38082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38083	#[inline]
38084	#[target_feature(enable = "avx512f")]
38085	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38086	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
38087	#[rustc_legacy_const_generics(`2`)]
38088	pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38089	unsafe {
38090	static_assert_rounding!(ROUNDING);
38091	let a: f32x4 = a.as_f32x4();
38092	let b: f32x4 = b.as_f32x4();
38093	let r: f32x4 = vsubss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
38094	transmute(src:r)
38095	}
38096	}
38097
38098	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38099	///
38100	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38101	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38102	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38103	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38104	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38105	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38106	///
38107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38108	#[inline]
38109	#[target_feature(enable = "avx512f")]
38110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38111	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
38112	#[rustc_legacy_const_generics(`4`)]
38113	pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38114	src: __m128,
38115	k: __mmask8,
38116	a: __m128,
38117	b: __m128,
38118	) -> __m128 {
38119	unsafe {
38120	static_assert_rounding!(ROUNDING);
38121	let a: f32x4 = a.as_f32x4();
38122	let b: f32x4 = b.as_f32x4();
38123	let src: f32x4 = src.as_f32x4();
38124	let r: f32x4 = vsubss(a, b, src, mask:k, ROUNDING);
38125	transmute(src:r)
38126	}
38127	}
38128
38129	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38130	///
38131	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38132	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38133	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38134	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38135	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38136	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38137	///
38138	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38139	#[inline]
38140	#[target_feature(enable = "avx512f")]
38141	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38142	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
38143	#[rustc_legacy_const_generics(`3`)]
38144	pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38145	unsafe {
38146	static_assert_rounding!(ROUNDING);
38147	let a: f32x4 = a.as_f32x4();
38148	let b: f32x4 = b.as_f32x4();
38149	let r: f32x4 = vsubss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38150	transmute(src:r)
38151	}
38152	}
38153
38154	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38155	///
38156	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38157	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38158	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38159	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38160	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38161	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38162	///
38163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38164	#[inline]
38165	#[target_feature(enable = "avx512f")]
38166	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38167	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
38168	#[rustc_legacy_const_generics(`2`)]
38169	pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38170	unsafe {
38171	static_assert_rounding!(ROUNDING);
38172	let a: f64x2 = a.as_f64x2();
38173	let b: f64x2 = b.as_f64x2();
38174	let r: f64x2 = vsubsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
38175	transmute(src:r)
38176	}
38177	}
38178
38179	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38180	///
38181	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38182	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38183	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38184	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38185	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38186	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38187	///
38188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38189	#[inline]
38190	#[target_feature(enable = "avx512f")]
38191	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38192	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
38193	#[rustc_legacy_const_generics(`4`)]
38194	pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38195	src: __m128d,
38196	k: __mmask8,
38197	a: __m128d,
38198	b: __m128d,
38199	) -> __m128d {
38200	unsafe {
38201	static_assert_rounding!(ROUNDING);
38202	let a: f64x2 = a.as_f64x2();
38203	let b: f64x2 = b.as_f64x2();
38204	let src: f64x2 = src.as_f64x2();
38205	let r: f64x2 = vsubsd(a, b, src, mask:k, ROUNDING);
38206	transmute(src:r)
38207	}
38208	}
38209
38210	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38211	///
38212	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38213	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38214	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38215	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38216	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38217	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38218	///
38219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38220	#[inline]
38221	#[target_feature(enable = "avx512f")]
38222	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38223	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
38224	#[rustc_legacy_const_generics(`3`)]
38225	pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38226	unsafe {
38227	static_assert_rounding!(ROUNDING);
38228	let a: f64x2 = a.as_f64x2();
38229	let b: f64x2 = b.as_f64x2();
38230	let r: f64x2 = vsubsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38231	transmute(src:r)
38232	}
38233	}
38234
38235	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38236	///
38237	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38238	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38239	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38240	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38241	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38242	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38243	///
38244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38245	#[inline]
38246	#[target_feature(enable = "avx512f")]
38247	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38248	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
38249	#[rustc_legacy_const_generics(`2`)]
38250	pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38251	unsafe {
38252	static_assert_rounding!(ROUNDING);
38253	let a: f32x4 = a.as_f32x4();
38254	let b: f32x4 = b.as_f32x4();
38255	let r: f32x4 = vmulss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
38256	transmute(src:r)
38257	}
38258	}
38259
38260	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38261	///
38262	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38263	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38264	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38265	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38266	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38267	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38268	///
38269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38270	#[inline]
38271	#[target_feature(enable = "avx512f")]
38272	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38273	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
38274	#[rustc_legacy_const_generics(`4`)]
38275	pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38276	src: __m128,
38277	k: __mmask8,
38278	a: __m128,
38279	b: __m128,
38280	) -> __m128 {
38281	unsafe {
38282	static_assert_rounding!(ROUNDING);
38283	let a: f32x4 = a.as_f32x4();
38284	let b: f32x4 = b.as_f32x4();
38285	let src: f32x4 = src.as_f32x4();
38286	let r: f32x4 = vmulss(a, b, src, mask:k, ROUNDING);
38287	transmute(src:r)
38288	}
38289	}
38290
38291	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38292	///
38293	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38294	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38295	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38296	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38297	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38298	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38299	///
38300	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38301	#[inline]
38302	#[target_feature(enable = "avx512f")]
38303	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38304	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
38305	#[rustc_legacy_const_generics(`3`)]
38306	pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38307	unsafe {
38308	static_assert_rounding!(ROUNDING);
38309	let a: f32x4 = a.as_f32x4();
38310	let b: f32x4 = b.as_f32x4();
38311	let r: f32x4 = vmulss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38312	transmute(src:r)
38313	}
38314	}
38315
38316	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38317	///
38318	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38319	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38320	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38321	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38322	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38323	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38324	///
38325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38326	#[inline]
38327	#[target_feature(enable = "avx512f")]
38328	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38329	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
38330	#[rustc_legacy_const_generics(`2`)]
38331	pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38332	unsafe {
38333	static_assert_rounding!(ROUNDING);
38334	let a: f64x2 = a.as_f64x2();
38335	let b: f64x2 = b.as_f64x2();
38336	let r: f64x2 = vmulsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
38337	transmute(src:r)
38338	}
38339	}
38340
38341	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38342	///
38343	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38344	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38345	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38346	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38347	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38348	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38349	///
38350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38351	#[inline]
38352	#[target_feature(enable = "avx512f")]
38353	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38354	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
38355	#[rustc_legacy_const_generics(`4`)]
38356	pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38357	src: __m128d,
38358	k: __mmask8,
38359	a: __m128d,
38360	b: __m128d,
38361	) -> __m128d {
38362	unsafe {
38363	static_assert_rounding!(ROUNDING);
38364	let a: f64x2 = a.as_f64x2();
38365	let b: f64x2 = b.as_f64x2();
38366	let src: f64x2 = src.as_f64x2();
38367	let r: f64x2 = vmulsd(a, b, src, mask:k, ROUNDING);
38368	transmute(src:r)
38369	}
38370	}
38371
38372	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38373	///
38374	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38375	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38376	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38377	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38378	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38379	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38380	///
38381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38382	#[inline]
38383	#[target_feature(enable = "avx512f")]
38384	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38385	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
38386	#[rustc_legacy_const_generics(`3`)]
38387	pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38388	unsafe {
38389	static_assert_rounding!(ROUNDING);
38390	let a: f64x2 = a.as_f64x2();
38391	let b: f64x2 = b.as_f64x2();
38392	let r: f64x2 = vmulsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38393	transmute(src:r)
38394	}
38395	}
38396
38397	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38398	///
38399	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38400	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38401	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38402	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38403	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38404	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38405	///
38406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38407	#[inline]
38408	#[target_feature(enable = "avx512f")]
38409	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38410	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
38411	#[rustc_legacy_const_generics(`2`)]
38412	pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38413	unsafe {
38414	static_assert_rounding!(ROUNDING);
38415	let a: f32x4 = a.as_f32x4();
38416	let b: f32x4 = b.as_f32x4();
38417	let r: f32x4 = vdivss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
38418	transmute(src:r)
38419	}
38420	}
38421
38422	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38423	///
38424	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38425	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38426	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38427	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38428	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38429	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38430	///
38431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38432	#[inline]
38433	#[target_feature(enable = "avx512f")]
38434	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38435	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
38436	#[rustc_legacy_const_generics(`4`)]
38437	pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38438	src: __m128,
38439	k: __mmask8,
38440	a: __m128,
38441	b: __m128,
38442	) -> __m128 {
38443	unsafe {
38444	static_assert_rounding!(ROUNDING);
38445	let a: f32x4 = a.as_f32x4();
38446	let b: f32x4 = b.as_f32x4();
38447	let src: f32x4 = src.as_f32x4();
38448	let r: f32x4 = vdivss(a, b, src, mask:k, ROUNDING);
38449	transmute(src:r)
38450	}
38451	}
38452
38453	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38454	///
38455	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38456	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38457	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38458	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38459	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38460	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38461	///
38462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38463	#[inline]
38464	#[target_feature(enable = "avx512f")]
38465	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38466	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
38467	#[rustc_legacy_const_generics(`3`)]
38468	pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38469	unsafe {
38470	static_assert_rounding!(ROUNDING);
38471	let a: f32x4 = a.as_f32x4();
38472	let b: f32x4 = b.as_f32x4();
38473	let r: f32x4 = vdivss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38474	transmute(src:r)
38475	}
38476	}
38477
38478	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38479	///
38480	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38481	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38482	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38483	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38484	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38485	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38486	///
38487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38488	#[inline]
38489	#[target_feature(enable = "avx512f")]
38490	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38491	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
38492	#[rustc_legacy_const_generics(`2`)]
38493	pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38494	unsafe {
38495	static_assert_rounding!(ROUNDING);
38496	let a: f64x2 = a.as_f64x2();
38497	let b: f64x2 = b.as_f64x2();
38498	let r: f64x2 = vdivsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
38499	transmute(src:r)
38500	}
38501	}
38502
38503	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38504	///
38505	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38506	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38507	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38508	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38509	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38510	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38511	///
38512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38513	#[inline]
38514	#[target_feature(enable = "avx512f")]
38515	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38516	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
38517	#[rustc_legacy_const_generics(`4`)]
38518	pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38519	src: __m128d,
38520	k: __mmask8,
38521	a: __m128d,
38522	b: __m128d,
38523	) -> __m128d {
38524	unsafe {
38525	static_assert_rounding!(ROUNDING);
38526	let a: f64x2 = a.as_f64x2();
38527	let b: f64x2 = b.as_f64x2();
38528	let src: f64x2 = src.as_f64x2();
38529	let r: f64x2 = vdivsd(a, b, src, mask:k, ROUNDING);
38530	transmute(src:r)
38531	}
38532	}
38533
38534	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38535	///
38536	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38537	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38538	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38539	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38540	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38541	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38542	///
38543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38544	#[inline]
38545	#[target_feature(enable = "avx512f")]
38546	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38547	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
38548	#[rustc_legacy_const_generics(`3`)]
38549	pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38550	unsafe {
38551	static_assert_rounding!(ROUNDING);
38552	let a: f64x2 = a.as_f64x2();
38553	let b: f64x2 = b.as_f64x2();
38554	let r: f64x2 = vdivsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38555	transmute(src:r)
38556	}
38557	}
38558
38559	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38560	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38561	///
38562	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38563	#[inline]
38564	#[target_feature(enable = "avx512f")]
38565	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38566	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
38567	#[rustc_legacy_const_generics(`2`)]
38568	pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38569	unsafe {
38570	static_assert_sae!(SAE);
38571	let a: f32x4 = a.as_f32x4();
38572	let b: f32x4 = b.as_f32x4();
38573	let r: f32x4 = vmaxss(a, b, src:f32x4::ZERO, mask:`0b1`, SAE);
38574	transmute(src:r)
38575	}
38576	}
38577
38578	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38579	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38580	///
38581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38582	#[inline]
38583	#[target_feature(enable = "avx512f")]
38584	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38585	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
38586	#[rustc_legacy_const_generics(`4`)]
38587	pub fn _mm_mask_max_round_ss<const SAE: i32>(
38588	src: __m128,
38589	k: __mmask8,
38590	a: __m128,
38591	b: __m128,
38592	) -> __m128 {
38593	unsafe {
38594	static_assert_sae!(SAE);
38595	let a: f32x4 = a.as_f32x4();
38596	let b: f32x4 = b.as_f32x4();
38597	let src: f32x4 = src.as_f32x4();
38598	let r: f32x4 = vmaxss(a, b, src, mask:k, SAE);
38599	transmute(src:r)
38600	}
38601	}
38602
38603	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38604	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38605	///
38606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38607	#[inline]
38608	#[target_feature(enable = "avx512f")]
38609	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38610	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
38611	#[rustc_legacy_const_generics(`3`)]
38612	pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38613	unsafe {
38614	static_assert_sae!(SAE);
38615	let a: f32x4 = a.as_f32x4();
38616	let b: f32x4 = b.as_f32x4();
38617	let r: f32x4 = vmaxss(a, b, src:f32x4::ZERO, mask:k, SAE);
38618	transmute(src:r)
38619	}
38620	}
38621
38622	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38623	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38624	///
38625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38626	#[inline]
38627	#[target_feature(enable = "avx512f")]
38628	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38629	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
38630	#[rustc_legacy_const_generics(`2`)]
38631	pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38632	unsafe {
38633	static_assert_sae!(SAE);
38634	let a: f64x2 = a.as_f64x2();
38635	let b: f64x2 = b.as_f64x2();
38636	let r: f64x2 = vmaxsd(a, b, src:f64x2::ZERO, mask:`0b1`, SAE);
38637	transmute(src:r)
38638	}
38639	}
38640
38641	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38642	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38643	///
38644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38645	#[inline]
38646	#[target_feature(enable = "avx512f")]
38647	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38648	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
38649	#[rustc_legacy_const_generics(`4`)]
38650	pub fn _mm_mask_max_round_sd<const SAE: i32>(
38651	src: __m128d,
38652	k: __mmask8,
38653	a: __m128d,
38654	b: __m128d,
38655	) -> __m128d {
38656	unsafe {
38657	static_assert_sae!(SAE);
38658	let a: f64x2 = a.as_f64x2();
38659	let b: f64x2 = b.as_f64x2();
38660	let src: f64x2 = src.as_f64x2();
38661	let r: f64x2 = vmaxsd(a, b, src, mask:k, SAE);
38662	transmute(src:r)
38663	}
38664	}
38665
38666	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38667	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38668	///
38669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38670	#[inline]
38671	#[target_feature(enable = "avx512f")]
38672	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38673	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
38674	#[rustc_legacy_const_generics(`3`)]
38675	pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38676	unsafe {
38677	static_assert_sae!(SAE);
38678	let a: f64x2 = a.as_f64x2();
38679	let b: f64x2 = b.as_f64x2();
38680	let r: f64x2 = vmaxsd(a, b, src:f64x2::ZERO, mask:k, SAE);
38681	transmute(src:r)
38682	}
38683	}
38684
38685	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38686	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38687	///
38688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38689	#[inline]
38690	#[target_feature(enable = "avx512f")]
38691	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38692	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
38693	#[rustc_legacy_const_generics(`2`)]
38694	pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38695	unsafe {
38696	static_assert_sae!(SAE);
38697	let a: f32x4 = a.as_f32x4();
38698	let b: f32x4 = b.as_f32x4();
38699	let r: f32x4 = vminss(a, b, src:f32x4::ZERO, mask:`0b1`, SAE);
38700	transmute(src:r)
38701	}
38702	}
38703
38704	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38705	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38706	///
38707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38708	#[inline]
38709	#[target_feature(enable = "avx512f")]
38710	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38711	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
38712	#[rustc_legacy_const_generics(`4`)]
38713	pub fn _mm_mask_min_round_ss<const SAE: i32>(
38714	src: __m128,
38715	k: __mmask8,
38716	a: __m128,
38717	b: __m128,
38718	) -> __m128 {
38719	unsafe {
38720	static_assert_sae!(SAE);
38721	let a: f32x4 = a.as_f32x4();
38722	let b: f32x4 = b.as_f32x4();
38723	let src: f32x4 = src.as_f32x4();
38724	let r: f32x4 = vminss(a, b, src, mask:k, SAE);
38725	transmute(src:r)
38726	}
38727	}
38728
38729	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38730	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38731	///
38732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38733	#[inline]
38734	#[target_feature(enable = "avx512f")]
38735	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38736	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
38737	#[rustc_legacy_const_generics(`3`)]
38738	pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38739	unsafe {
38740	static_assert_sae!(SAE);
38741	let a: f32x4 = a.as_f32x4();
38742	let b: f32x4 = b.as_f32x4();
38743	let r: f32x4 = vminss(a, b, src:f32x4::ZERO, mask:k, SAE);
38744	transmute(src:r)
38745	}
38746	}
38747
38748	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38749	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38750	///
38751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38752	#[inline]
38753	#[target_feature(enable = "avx512f")]
38754	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38755	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
38756	#[rustc_legacy_const_generics(`2`)]
38757	pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38758	unsafe {
38759	static_assert_sae!(SAE);
38760	let a: f64x2 = a.as_f64x2();
38761	let b: f64x2 = b.as_f64x2();
38762	let r: f64x2 = vminsd(a, b, src:f64x2::ZERO, mask:`0b1`, SAE);
38763	transmute(src:r)
38764	}
38765	}
38766
38767	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38768	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38769	///
38770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38771	#[inline]
38772	#[target_feature(enable = "avx512f")]
38773	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38774	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
38775	#[rustc_legacy_const_generics(`4`)]
38776	pub fn _mm_mask_min_round_sd<const SAE: i32>(
38777	src: __m128d,
38778	k: __mmask8,
38779	a: __m128d,
38780	b: __m128d,
38781	) -> __m128d {
38782	unsafe {
38783	static_assert_sae!(SAE);
38784	let a: f64x2 = a.as_f64x2();
38785	let b: f64x2 = b.as_f64x2();
38786	let src: f64x2 = src.as_f64x2();
38787	let r: f64x2 = vminsd(a, b, src, mask:k, SAE);
38788	transmute(src:r)
38789	}
38790	}
38791
38792	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38793	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38794	///
38795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38796	#[inline]
38797	#[target_feature(enable = "avx512f")]
38798	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38799	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
38800	#[rustc_legacy_const_generics(`3`)]
38801	pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38802	unsafe {
38803	static_assert_sae!(SAE);
38804	let a: f64x2 = a.as_f64x2();
38805	let b: f64x2 = b.as_f64x2();
38806	let r: f64x2 = vminsd(a, b, src:f64x2::ZERO, mask:k, SAE);
38807	transmute(src:r)
38808	}
38809	}
38810
38811	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38812	///
38813	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38814	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38815	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38816	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38817	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38818	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38819	///
38820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38821	#[inline]
38822	#[target_feature(enable = "avx512f")]
38823	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38824	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
38825	#[rustc_legacy_const_generics(`2`)]
38826	pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38827	unsafe {
38828	static_assert_rounding!(ROUNDING);
38829	vsqrtss(a, b, src:_mm_setzero_ps(), mask:`0b1`, ROUNDING)
38830	}
38831	}
38832
38833	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38834	///
38835	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38836	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38837	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38838	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38839	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38840	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38841	///
38842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38843	#[inline]
38844	#[target_feature(enable = "avx512f")]
38845	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38846	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
38847	#[rustc_legacy_const_generics(`4`)]
38848	pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38849	src: __m128,
38850	k: __mmask8,
38851	a: __m128,
38852	b: __m128,
38853	) -> __m128 {
38854	unsafe {
38855	static_assert_rounding!(ROUNDING);
38856	vsqrtss(a, b, src, mask:k, ROUNDING)
38857	}
38858	}
38859
38860	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38861	///
38862	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38863	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38864	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38865	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38866	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38867	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38868	///
38869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38870	#[inline]
38871	#[target_feature(enable = "avx512f")]
38872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38873	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
38874	#[rustc_legacy_const_generics(`3`)]
38875	pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38876	unsafe {
38877	static_assert_rounding!(ROUNDING);
38878	vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, ROUNDING)
38879	}
38880	}
38881
38882	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38883	///
38884	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38885	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38886	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38887	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38888	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38889	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38890	///
38891	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38892	#[inline]
38893	#[target_feature(enable = "avx512f")]
38894	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38895	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
38896	#[rustc_legacy_const_generics(`2`)]
38897	pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38898	unsafe {
38899	static_assert_rounding!(ROUNDING);
38900	vsqrtsd(a, b, src:_mm_setzero_pd(), mask:`0b1`, ROUNDING)
38901	}
38902	}
38903
38904	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38905	///
38906	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38907	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38908	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38909	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38910	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38911	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38912	///
38913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38914	#[inline]
38915	#[target_feature(enable = "avx512f")]
38916	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38917	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
38918	#[rustc_legacy_const_generics(`4`)]
38919	pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38920	src: __m128d,
38921	k: __mmask8,
38922	a: __m128d,
38923	b: __m128d,
38924	) -> __m128d {
38925	unsafe {
38926	static_assert_rounding!(ROUNDING);
38927	vsqrtsd(a, b, src, mask:k, ROUNDING)
38928	}
38929	}
38930
38931	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38932	///
38933	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38934	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38935	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38936	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38937	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38938	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38939	///
38940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
38941	#[inline]
38942	#[target_feature(enable = "avx512f")]
38943	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38944	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
38945	#[rustc_legacy_const_generics(`3`)]
38946	pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
38947	k: __mmask8,
38948	a: __m128d,
38949	b: __m128d,
38950	) -> __m128d {
38951	unsafe {
38952	static_assert_rounding!(ROUNDING);
38953	vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, ROUNDING)
38954	}
38955	}
38956
38957	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38958	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38959	///
38960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
38961	#[inline]
38962	#[target_feature(enable = "avx512f")]
38963	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38964	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
38965	#[rustc_legacy_const_generics(`2`)]
38966	pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38967	unsafe {
38968	static_assert_sae!(SAE);
38969	let a: f32x4 = a.as_f32x4();
38970	let b: f32x4 = b.as_f32x4();
38971	let r: f32x4 = vgetexpss(a, b, src:f32x4::ZERO, mask:`0b1`, SAE);
38972	transmute(src:r)
38973	}
38974	}
38975
38976	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38977	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38978	///
38979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
38980	#[inline]
38981	#[target_feature(enable = "avx512f")]
38982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38983	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
38984	#[rustc_legacy_const_generics(`4`)]
38985	pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
38986	src: __m128,
38987	k: __mmask8,
38988	a: __m128,
38989	b: __m128,
38990	) -> __m128 {
38991	unsafe {
38992	static_assert_sae!(SAE);
38993	let a: f32x4 = a.as_f32x4();
38994	let b: f32x4 = b.as_f32x4();
38995	let src: f32x4 = src.as_f32x4();
38996	let r: f32x4 = vgetexpss(a, b, src, mask:k, SAE);
38997	transmute(src:r)
38998	}
38999	}
39000
39001	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39002	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39003	///
39004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39005	#[inline]
39006	#[target_feature(enable = "avx512f")]
39007	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39008	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
39009	#[rustc_legacy_const_generics(`3`)]
39010	pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39011	unsafe {
39012	static_assert_sae!(SAE);
39013	let a: f32x4 = a.as_f32x4();
39014	let b: f32x4 = b.as_f32x4();
39015	let r: f32x4 = vgetexpss(a, b, src:f32x4::ZERO, mask:k, SAE);
39016	transmute(src:r)
39017	}
39018	}
39019
39020	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39021	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39022	///
39023	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39024	#[inline]
39025	#[target_feature(enable = "avx512f")]
39026	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39027	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
39028	#[rustc_legacy_const_generics(`2`)]
39029	pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39030	unsafe {
39031	static_assert_sae!(SAE);
39032	let a: f64x2 = a.as_f64x2();
39033	let b: f64x2 = b.as_f64x2();
39034	let r: f64x2 = vgetexpsd(a, b, src:f64x2::ZERO, mask:`0b1`, SAE);
39035	transmute(src:r)
39036	}
39037	}
39038
39039	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39040	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39041	///
39042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39043	#[inline]
39044	#[target_feature(enable = "avx512f")]
39045	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39046	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
39047	#[rustc_legacy_const_generics(`4`)]
39048	pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39049	src: __m128d,
39050	k: __mmask8,
39051	a: __m128d,
39052	b: __m128d,
39053	) -> __m128d {
39054	unsafe {
39055	static_assert_sae!(SAE);
39056	let a: f64x2 = a.as_f64x2();
39057	let b: f64x2 = b.as_f64x2();
39058	let src: f64x2 = src.as_f64x2();
39059	let r: f64x2 = vgetexpsd(a, b, src, mask:k, SAE);
39060	transmute(src:r)
39061	}
39062	}
39063
39064	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39065	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39066	///
39067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39068	#[inline]
39069	#[target_feature(enable = "avx512f")]
39070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39071	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
39072	#[rustc_legacy_const_generics(`3`)]
39073	pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39074	unsafe {
39075	static_assert_sae!(SAE);
39076	let a: f64x2 = a.as_f64x2();
39077	let b: f64x2 = b.as_f64x2();
39078	let r: f64x2 = vgetexpsd(a, b, src:f64x2::ZERO, mask:k, SAE);
39079	transmute(src:r)
39080	}
39081	}
39082
39083	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39084	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39085	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39086	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39087	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39088	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39089	/// The sign is determined by sc which can take the following values:\
39090	/// _MM_MANT_SIGN_src // sign = sign(src)\
39091	/// _MM_MANT_SIGN_zero // sign = 0\
39092	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39093	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39094	///
39095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39096	#[inline]
39097	#[target_feature(enable = "avx512f")]
39098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39099	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
39100	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
39101	pub fn _mm_getmant_round_ss<
39102	const NORM: _MM_MANTISSA_NORM_ENUM,
39103	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39104	const SAE: i32,
39105	>(
39106	a: __m128,
39107	b: __m128,
39108	) -> __m128 {
39109	unsafe {
39110	static_assert_uimm_bits!(NORM, `4`);
39111	static_assert_uimm_bits!(SIGN, `2`);
39112	static_assert_mantissas_sae!(SAE);
39113	let a: f32x4 = a.as_f32x4();
39114	let b: f32x4 = b.as_f32x4();
39115	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src:f32x4::ZERO, m:`0b1`, SAE);
39116	transmute(src:r)
39117	}
39118	}
39119
39120	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39121	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39122	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39123	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39124	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39125	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39126	/// The sign is determined by sc which can take the following values:\
39127	/// _MM_MANT_SIGN_src // sign = sign(src)\
39128	/// _MM_MANT_SIGN_zero // sign = 0\
39129	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39130	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39131	///
39132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39133	#[inline]
39134	#[target_feature(enable = "avx512f")]
39135	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39136	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
39137	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
39138	pub fn _mm_mask_getmant_round_ss<
39139	const NORM: _MM_MANTISSA_NORM_ENUM,
39140	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39141	const SAE: i32,
39142	>(
39143	src: __m128,
39144	k: __mmask8,
39145	a: __m128,
39146	b: __m128,
39147	) -> __m128 {
39148	unsafe {
39149	static_assert_uimm_bits!(NORM, `4`);
39150	static_assert_uimm_bits!(SIGN, `2`);
39151	static_assert_mantissas_sae!(SAE);
39152	let a: f32x4 = a.as_f32x4();
39153	let b: f32x4 = b.as_f32x4();
39154	let src: f32x4 = src.as_f32x4();
39155	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src, m:k, SAE);
39156	transmute(src:r)
39157	}
39158	}
39159
39160	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39161	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39162	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39163	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39164	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39165	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39166	/// The sign is determined by sc which can take the following values:\
39167	/// _MM_MANT_SIGN_src // sign = sign(src)\
39168	/// _MM_MANT_SIGN_zero // sign = 0\
39169	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39170	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39171	///
39172	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39173	#[inline]
39174	#[target_feature(enable = "avx512f")]
39175	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39176	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
39177	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
39178	pub fn _mm_maskz_getmant_round_ss<
39179	const NORM: _MM_MANTISSA_NORM_ENUM,
39180	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39181	const SAE: i32,
39182	>(
39183	k: __mmask8,
39184	a: __m128,
39185	b: __m128,
39186	) -> __m128 {
39187	unsafe {
39188	static_assert_uimm_bits!(NORM, `4`);
39189	static_assert_uimm_bits!(SIGN, `2`);
39190	static_assert_mantissas_sae!(SAE);
39191	let a: f32x4 = a.as_f32x4();
39192	let b: f32x4 = b.as_f32x4();
39193	let r: f32x4 = vgetmantss(a, b, SIGN << `2` \| NORM, src:f32x4::ZERO, m:k, SAE);
39194	transmute(src:r)
39195	}
39196	}
39197
39198	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39199	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39200	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39201	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39202	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39203	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39204	/// The sign is determined by sc which can take the following values:\
39205	/// _MM_MANT_SIGN_src // sign = sign(src)\
39206	/// _MM_MANT_SIGN_zero // sign = 0\
39207	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39208	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39209	///
39210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39211	#[inline]
39212	#[target_feature(enable = "avx512f")]
39213	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39214	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
39215	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
39216	pub fn _mm_getmant_round_sd<
39217	const NORM: _MM_MANTISSA_NORM_ENUM,
39218	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39219	const SAE: i32,
39220	>(
39221	a: __m128d,
39222	b: __m128d,
39223	) -> __m128d {
39224	unsafe {
39225	static_assert_uimm_bits!(NORM, `4`);
39226	static_assert_uimm_bits!(SIGN, `2`);
39227	static_assert_mantissas_sae!(SAE);
39228	let a: f64x2 = a.as_f64x2();
39229	let b: f64x2 = b.as_f64x2();
39230	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src:f64x2::ZERO, m:`0b1`, SAE);
39231	transmute(src:r)
39232	}
39233	}
39234
39235	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39236	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39237	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39238	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39239	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39240	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39241	/// The sign is determined by sc which can take the following values:\
39242	/// _MM_MANT_SIGN_src // sign = sign(src)\
39243	/// _MM_MANT_SIGN_zero // sign = 0\
39244	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39245	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39246	///
39247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39248	#[inline]
39249	#[target_feature(enable = "avx512f")]
39250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39251	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
39252	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
39253	pub fn _mm_mask_getmant_round_sd<
39254	const NORM: _MM_MANTISSA_NORM_ENUM,
39255	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39256	const SAE: i32,
39257	>(
39258	src: __m128d,
39259	k: __mmask8,
39260	a: __m128d,
39261	b: __m128d,
39262	) -> __m128d {
39263	unsafe {
39264	static_assert_uimm_bits!(NORM, `4`);
39265	static_assert_uimm_bits!(SIGN, `2`);
39266	static_assert_mantissas_sae!(SAE);
39267	let a: f64x2 = a.as_f64x2();
39268	let b: f64x2 = b.as_f64x2();
39269	let src: f64x2 = src.as_f64x2();
39270	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src, m:k, SAE);
39271	transmute(src:r)
39272	}
39273	}
39274
39275	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39276	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39277	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39278	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39279	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39280	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39281	/// The sign is determined by sc which can take the following values:\
39282	/// _MM_MANT_SIGN_src // sign = sign(src)\
39283	/// _MM_MANT_SIGN_zero // sign = 0\
39284	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39285	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39286	///
39287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39288	#[inline]
39289	#[target_feature(enable = "avx512f")]
39290	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39291	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
39292	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
39293	pub fn _mm_maskz_getmant_round_sd<
39294	const NORM: _MM_MANTISSA_NORM_ENUM,
39295	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39296	const SAE: i32,
39297	>(
39298	k: __mmask8,
39299	a: __m128d,
39300	b: __m128d,
39301	) -> __m128d {
39302	unsafe {
39303	static_assert_uimm_bits!(NORM, `4`);
39304	static_assert_uimm_bits!(SIGN, `2`);
39305	static_assert_mantissas_sae!(SAE);
39306	let a: f64x2 = a.as_f64x2();
39307	let b: f64x2 = b.as_f64x2();
39308	let r: f64x2 = vgetmantsd(a, b, SIGN << `2` \| NORM, src:f64x2::ZERO, m:k, SAE);
39309	transmute(src:r)
39310	}
39311	}
39312
39313	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39314	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39315	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39316	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39317	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39318	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39319	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39320	///
39321	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39323	#[inline]
39324	#[target_feature(enable = "avx512f")]
39325	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39326	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
39327	#[rustc_legacy_const_generics(`2`, `3`)]
39328	pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39329	unsafe {
39330	static_assert_uimm_bits!(IMM8, `8`);
39331	static_assert_mantissas_sae!(SAE);
39332	let a: f32x4 = a.as_f32x4();
39333	let b: f32x4 = b.as_f32x4();
39334	let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:`0b11111111`, IMM8, SAE);
39335	transmute(src:r)
39336	}
39337	}
39338
39339	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39340	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39341	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39342	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39343	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39344	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39345	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39346	///
39347	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39349	#[inline]
39350	#[target_feature(enable = "avx512f")]
39351	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39352	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
39353	#[rustc_legacy_const_generics(`4`, `5`)]
39354	pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39355	src: __m128,
39356	k: __mmask8,
39357	a: __m128,
39358	b: __m128,
39359	) -> __m128 {
39360	unsafe {
39361	static_assert_uimm_bits!(IMM8, `8`);
39362	static_assert_mantissas_sae!(SAE);
39363	let a: f32x4 = a.as_f32x4();
39364	let b: f32x4 = b.as_f32x4();
39365	let src: f32x4 = src.as_f32x4();
39366	let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, SAE);
39367	transmute(src:r)
39368	}
39369	}
39370
39371	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39372	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39373	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39374	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39375	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39376	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39377	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39378	///
39379	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39381	#[inline]
39382	#[target_feature(enable = "avx512f")]
39383	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39384	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
39385	#[rustc_legacy_const_generics(`3`, `4`)]
39386	pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39387	k: __mmask8,
39388	a: __m128,
39389	b: __m128,
39390	) -> __m128 {
39391	unsafe {
39392	static_assert_uimm_bits!(IMM8, `8`);
39393	static_assert_mantissas_sae!(SAE);
39394	let a: f32x4 = a.as_f32x4();
39395	let b: f32x4 = b.as_f32x4();
39396	let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, SAE);
39397	transmute(src:r)
39398	}
39399	}
39400
39401	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39402	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39403	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39404	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39405	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39406	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39407	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39408	///
39409	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39410	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39411	#[inline]
39412	#[target_feature(enable = "avx512f")]
39413	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39414	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
39415	#[rustc_legacy_const_generics(`2`, `3`)]
39416	pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39417	unsafe {
39418	static_assert_uimm_bits!(IMM8, `8`);
39419	static_assert_mantissas_sae!(SAE);
39420	let a: f64x2 = a.as_f64x2();
39421	let b: f64x2 = b.as_f64x2();
39422	let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:`0b11111111`, IMM8, SAE);
39423	transmute(src:r)
39424	}
39425	}
39426
39427	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39428	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39429	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39430	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39431	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39432	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39433	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39434	///
39435	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39437	#[inline]
39438	#[target_feature(enable = "avx512f")]
39439	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39440	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
39441	#[rustc_legacy_const_generics(`4`, `5`)]
39442	pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39443	src: __m128d,
39444	k: __mmask8,
39445	a: __m128d,
39446	b: __m128d,
39447	) -> __m128d {
39448	unsafe {
39449	static_assert_uimm_bits!(IMM8, `8`);
39450	static_assert_mantissas_sae!(SAE);
39451	let a: f64x2 = a.as_f64x2();
39452	let b: f64x2 = b.as_f64x2();
39453	let src: f64x2 = src.as_f64x2();
39454	let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
39455	transmute(src:r)
39456	}
39457	}
39458
39459	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39460	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39461	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39462	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39463	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39464	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39465	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39466	///
39467	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39469	#[inline]
39470	#[target_feature(enable = "avx512f")]
39471	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39472	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
39473	#[rustc_legacy_const_generics(`3`, `4`)]
39474	pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39475	k: __mmask8,
39476	a: __m128d,
39477	b: __m128d,
39478	) -> __m128d {
39479	unsafe {
39480	static_assert_uimm_bits!(IMM8, `8`);
39481	static_assert_mantissas_sae!(SAE);
39482	let a: f64x2 = a.as_f64x2();
39483	let b: f64x2 = b.as_f64x2();
39484	let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, SAE);
39485	transmute(src:r)
39486	}
39487	}
39488
39489	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39490	///
39491	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39492	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39493	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39494	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39495	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39496	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39497	///
39498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39499	#[inline]
39500	#[target_feature(enable = "avx512f")]
39501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39502	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
39503	#[rustc_legacy_const_generics(`2`)]
39504	pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39505	unsafe {
39506	static_assert_rounding!(ROUNDING);
39507	let a: f32x4 = a.as_f32x4();
39508	let b: f32x4 = b.as_f32x4();
39509	let r: f32x4 = vscalefss(a, b, src:f32x4::ZERO, mask:`0b11111111`, ROUNDING);
39510	transmute(src:r)
39511	}
39512	}
39513
39514	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39515	///
39516	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39517	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39518	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39519	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39520	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39521	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39522	///
39523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39524	#[inline]
39525	#[target_feature(enable = "avx512f")]
39526	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39527	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
39528	#[rustc_legacy_const_generics(`4`)]
39529	pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39530	src: __m128,
39531	k: __mmask8,
39532	a: __m128,
39533	b: __m128,
39534	) -> __m128 {
39535	unsafe {
39536	static_assert_rounding!(ROUNDING);
39537	let a: f32x4 = a.as_f32x4();
39538	let b: f32x4 = b.as_f32x4();
39539	let src: f32x4 = src.as_f32x4();
39540	let r: f32x4 = vscalefss(a, b, src, mask:k, ROUNDING);
39541	transmute(src:r)
39542	}
39543	}
39544
39545	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39546	///
39547	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39548	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39549	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39550	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39551	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39552	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39553	///
39554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39555	#[inline]
39556	#[target_feature(enable = "avx512f")]
39557	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39558	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
39559	#[rustc_legacy_const_generics(`3`)]
39560	pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39561	unsafe {
39562	static_assert_rounding!(ROUNDING);
39563	let a: f32x4 = a.as_f32x4();
39564	let b: f32x4 = b.as_f32x4();
39565	let r: f32x4 = vscalefss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
39566	transmute(src:r)
39567	}
39568	}
39569
39570	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39571	///
39572	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39573	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39574	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39575	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39576	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39577	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39578	///
39579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39580	#[inline]
39581	#[target_feature(enable = "avx512f")]
39582	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39583	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
39584	#[rustc_legacy_const_generics(`2`)]
39585	pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39586	unsafe {
39587	static_assert_rounding!(ROUNDING);
39588	let a: f64x2 = a.as_f64x2();
39589	let b: f64x2 = b.as_f64x2();
39590	let r: f64x2 = vscalefsd(a, b, src:f64x2::ZERO, mask:`0b11111111`, ROUNDING);
39591	transmute(src:r)
39592	}
39593	}
39594
39595	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39596	///
39597	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39598	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39599	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39600	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39601	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39602	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39603	///
39604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39605	#[inline]
39606	#[target_feature(enable = "avx512f")]
39607	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39608	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
39609	#[rustc_legacy_const_generics(`4`)]
39610	pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39611	src: __m128d,
39612	k: __mmask8,
39613	a: __m128d,
39614	b: __m128d,
39615	) -> __m128d {
39616	unsafe {
39617	let a: f64x2 = a.as_f64x2();
39618	let b: f64x2 = b.as_f64x2();
39619	let src: f64x2 = src.as_f64x2();
39620	let r: f64x2 = vscalefsd(a, b, src, mask:k, ROUNDING);
39621	transmute(src:r)
39622	}
39623	}
39624
39625	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39626	///
39627	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39628	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39629	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39630	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39631	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39632	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39633	///
39634	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39635	#[inline]
39636	#[target_feature(enable = "avx512f")]
39637	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39638	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
39639	#[rustc_legacy_const_generics(`3`)]
39640	pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39641	k: __mmask8,
39642	a: __m128d,
39643	b: __m128d,
39644	) -> __m128d {
39645	unsafe {
39646	static_assert_rounding!(ROUNDING);
39647	let a: f64x2 = a.as_f64x2();
39648	let b: f64x2 = b.as_f64x2();
39649	let r: f64x2 = vscalefsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
39650	transmute(src:r)
39651	}
39652	}
39653
39654	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39655	///
39656	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39657	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39658	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39659	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39660	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39661	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39662	///
39663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39664	#[inline]
39665	#[target_feature(enable = "avx512f")]
39666	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39667	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39668	#[rustc_legacy_const_generics(`3`)]
39669	pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39670	unsafe {
39671	static_assert_rounding!(ROUNDING);
39672	let extracta: f32 = simd_extract!(a, `0`);
39673	let extractb: f32 = simd_extract!(b, `0`);
39674	let extractc: f32 = simd_extract!(c, `0`);
39675	let r: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39676	simd_insert!(a, `0`, r)
39677	}
39678	}
39679
39680	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39681	///
39682	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39683	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39684	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39685	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39686	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39687	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39688	///
39689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39690	#[inline]
39691	#[target_feature(enable = "avx512f")]
39692	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39693	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39694	#[rustc_legacy_const_generics(`4`)]
39695	pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39696	a: __m128,
39697	k: __mmask8,
39698	b: __m128,
39699	c: __m128,
39700	) -> __m128 {
39701	unsafe {
39702	static_assert_rounding!(ROUNDING);
39703	let mut fmadd: f32 = simd_extract!(a, `0`);
39704	if (k & `0b00000001`) != `0` {
39705	let extractb: f32 = simd_extract!(b, `0`);
39706	let extractc: f32 = simd_extract!(c, `0`);
39707	fmadd = vfmaddssround(a:fmadd, b:extractb, c:extractc, ROUNDING);
39708	}
39709	simd_insert!(a, `0`, fmadd)
39710	}
39711	}
39712
39713	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39714	///
39715	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39716	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39717	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39718	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39719	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39720	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39721	///
39722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39723	#[inline]
39724	#[target_feature(enable = "avx512f")]
39725	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39726	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39727	#[rustc_legacy_const_generics(`4`)]
39728	pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39729	k: __mmask8,
39730	a: __m128,
39731	b: __m128,
39732	c: __m128,
39733	) -> __m128 {
39734	unsafe {
39735	static_assert_rounding!(ROUNDING);
39736	let mut fmadd: f32 = `0.`;
39737	if (k & `0b00000001`) != `0` {
39738	let extracta: f32 = simd_extract!(a, `0`);
39739	let extractb: f32 = simd_extract!(b, `0`);
39740	let extractc: f32 = simd_extract!(c, `0`);
39741	fmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39742	}
39743	simd_insert!(a, `0`, fmadd)
39744	}
39745	}
39746
39747	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39748	///
39749	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39750	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39751	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39752	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39753	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39754	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39755	///
39756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39757	#[inline]
39758	#[target_feature(enable = "avx512f")]
39759	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39760	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39761	#[rustc_legacy_const_generics(`4`)]
39762	pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39763	a: __m128,
39764	b: __m128,
39765	c: __m128,
39766	k: __mmask8,
39767	) -> __m128 {
39768	unsafe {
39769	static_assert_rounding!(ROUNDING);
39770	let mut fmadd: f32 = simd_extract!(c, `0`);
39771	if (k & `0b00000001`) != `0` {
39772	let extracta: f32 = simd_extract!(a, `0`);
39773	let extractb: f32 = simd_extract!(b, `0`);
39774	fmadd = vfmaddssround(a:extracta, b:extractb, c:fmadd, ROUNDING);
39775	}
39776	simd_insert!(c, `0`, fmadd)
39777	}
39778	}
39779
39780	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39781	///
39782	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39783	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39784	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39785	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39786	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39787	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39788	///
39789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39790	#[inline]
39791	#[target_feature(enable = "avx512f")]
39792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39793	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39794	#[rustc_legacy_const_generics(`3`)]
39795	pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39796	unsafe {
39797	static_assert_rounding!(ROUNDING);
39798	let extracta: f64 = simd_extract!(a, `0`);
39799	let extractb: f64 = simd_extract!(b, `0`);
39800	let extractc: f64 = simd_extract!(c, `0`);
39801	let fmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
39802	simd_insert!(a, `0`, fmadd)
39803	}
39804	}
39805
39806	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39807	///
39808	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39809	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39810	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39811	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39812	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39813	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39814	///
39815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39816	#[inline]
39817	#[target_feature(enable = "avx512f")]
39818	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39819	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39820	#[rustc_legacy_const_generics(`4`)]
39821	pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39822	a: __m128d,
39823	k: __mmask8,
39824	b: __m128d,
39825	c: __m128d,
39826	) -> __m128d {
39827	unsafe {
39828	static_assert_rounding!(ROUNDING);
39829	let mut fmadd: f64 = simd_extract!(a, `0`);
39830	if (k & `0b00000001`) != `0` {
39831	let extractb: f64 = simd_extract!(b, `0`);
39832	let extractc: f64 = simd_extract!(c, `0`);
39833	fmadd = vfmaddsdround(a:fmadd, b:extractb, c:extractc, ROUNDING);
39834	}
39835	simd_insert!(a, `0`, fmadd)
39836	}
39837	}
39838
39839	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39840	///
39841	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39842	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39843	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39844	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39845	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39846	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39847	///
39848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39849	#[inline]
39850	#[target_feature(enable = "avx512f")]
39851	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39852	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39853	#[rustc_legacy_const_generics(`4`)]
39854	pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39855	k: __mmask8,
39856	a: __m128d,
39857	b: __m128d,
39858	c: __m128d,
39859	) -> __m128d {
39860	unsafe {
39861	static_assert_rounding!(ROUNDING);
39862	let mut fmadd: f64 = `0.`;
39863	if (k & `0b00000001`) != `0` {
39864	let extracta: f64 = simd_extract!(a, `0`);
39865	let extractb: f64 = simd_extract!(b, `0`);
39866	let extractc: f64 = simd_extract!(c, `0`);
39867	fmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
39868	}
39869	simd_insert!(a, `0`, fmadd)
39870	}
39871	}
39872
39873	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39874	///
39875	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39876	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39877	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39878	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39879	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39880	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39881	///
39882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39883	#[inline]
39884	#[target_feature(enable = "avx512f")]
39885	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39886	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
39887	#[rustc_legacy_const_generics(`4`)]
39888	pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39889	a: __m128d,
39890	b: __m128d,
39891	c: __m128d,
39892	k: __mmask8,
39893	) -> __m128d {
39894	unsafe {
39895	static_assert_rounding!(ROUNDING);
39896	let mut fmadd: f64 = simd_extract!(c, `0`);
39897	if (k & `0b00000001`) != `0` {
39898	let extracta: f64 = simd_extract!(a, `0`);
39899	let extractb: f64 = simd_extract!(b, `0`);
39900	fmadd = vfmaddsdround(a:extracta, b:extractb, c:fmadd, ROUNDING);
39901	}
39902	simd_insert!(c, `0`, fmadd)
39903	}
39904	}
39905
39906	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39907	///
39908	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39909	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39910	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39911	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39912	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39913	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39914	///
39915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39916	#[inline]
39917	#[target_feature(enable = "avx512f")]
39918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39919	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
39920	#[rustc_legacy_const_generics(`3`)]
39921	pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39922	unsafe {
39923	static_assert_rounding!(ROUNDING);
39924	let extracta: f32 = simd_extract!(a, `0`);
39925	let extractb: f32 = simd_extract!(b, `0`);
39926	let extractc: f32 = simd_extract!(c, `0`);
39927	let extractc: f32 = -extractc;
39928	let fmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39929	simd_insert!(a, `0`, fmsub)
39930	}
39931	}
39932
39933	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39934	///
39935	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39936	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39937	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39938	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39939	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39940	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39941	///
39942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
39943	#[inline]
39944	#[target_feature(enable = "avx512f")]
39945	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39946	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
39947	#[rustc_legacy_const_generics(`4`)]
39948	pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
39949	a: __m128,
39950	k: __mmask8,
39951	b: __m128,
39952	c: __m128,
39953	) -> __m128 {
39954	unsafe {
39955	static_assert_rounding!(ROUNDING);
39956	let mut fmsub: f32 = simd_extract!(a, `0`);
39957	if (k & `0b00000001`) != `0` {
39958	let extractb: f32 = simd_extract!(b, `0`);
39959	let extractc: f32 = simd_extract!(c, `0`);
39960	let extractc: f32 = -extractc;
39961	fmsub = vfmaddssround(a:fmsub, b:extractb, c:extractc, ROUNDING);
39962	}
39963	simd_insert!(a, `0`, fmsub)
39964	}
39965	}
39966
39967	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39968	///
39969	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39970	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39971	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39972	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39973	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39974	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39975	///
39976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
39977	#[inline]
39978	#[target_feature(enable = "avx512f")]
39979	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39980	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
39981	#[rustc_legacy_const_generics(`4`)]
39982	pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
39983	k: __mmask8,
39984	a: __m128,
39985	b: __m128,
39986	c: __m128,
39987	) -> __m128 {
39988	unsafe {
39989	static_assert_rounding!(ROUNDING);
39990	let mut fmsub: f32 = `0.`;
39991	if (k & `0b00000001`) != `0` {
39992	let extracta: f32 = simd_extract!(a, `0`);
39993	let extractb: f32 = simd_extract!(b, `0`);
39994	let extractc: f32 = simd_extract!(c, `0`);
39995	let extractc: f32 = -extractc;
39996	fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39997	}
39998	simd_insert!(a, `0`, fmsub)
39999	}
40000	}
40001
40002	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40003	///
40004	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40005	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40006	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40007	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40008	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40009	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40010	///
40011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40012	#[inline]
40013	#[target_feature(enable = "avx512f")]
40014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40015	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
40016	#[rustc_legacy_const_generics(`4`)]
40017	pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40018	a: __m128,
40019	b: __m128,
40020	c: __m128,
40021	k: __mmask8,
40022	) -> __m128 {
40023	unsafe {
40024	static_assert_rounding!(ROUNDING);
40025	let mut fmsub: f32 = simd_extract!(c, `0`);
40026	if (k & `0b00000001`) != `0` {
40027	let extracta: f32 = simd_extract!(a, `0`);
40028	let extractb: f32 = simd_extract!(b, `0`);
40029	let extractc: f32 = -fmsub;
40030	fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40031	}
40032	simd_insert!(c, `0`, fmsub)
40033	}
40034	}
40035
40036	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40037	///
40038	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40039	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40040	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40041	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40042	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40043	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40044	///
40045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40046	#[inline]
40047	#[target_feature(enable = "avx512f")]
40048	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40049	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
40050	#[rustc_legacy_const_generics(`3`)]
40051	pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40052	unsafe {
40053	static_assert_rounding!(ROUNDING);
40054	let extracta: f64 = simd_extract!(a, `0`);
40055	let extractb: f64 = simd_extract!(b, `0`);
40056	let extractc: f64 = simd_extract!(c, `0`);
40057	let extractc: f64 = -extractc;
40058	let fmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40059	simd_insert!(a, `0`, fmsub)
40060	}
40061	}
40062
40063	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40064	///
40065	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40066	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40067	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40068	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40069	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40070	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40071	///
40072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40073	#[inline]
40074	#[target_feature(enable = "avx512f")]
40075	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40076	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
40077	#[rustc_legacy_const_generics(`4`)]
40078	pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40079	a: __m128d,
40080	k: __mmask8,
40081	b: __m128d,
40082	c: __m128d,
40083	) -> __m128d {
40084	unsafe {
40085	static_assert_rounding!(ROUNDING);
40086	let mut fmsub: f64 = simd_extract!(a, `0`);
40087	if (k & `0b00000001`) != `0` {
40088	let extractb: f64 = simd_extract!(b, `0`);
40089	let extractc: f64 = simd_extract!(c, `0`);
40090	let extractc: f64 = -extractc;
40091	fmsub = vfmaddsdround(a:fmsub, b:extractb, c:extractc, ROUNDING);
40092	}
40093	simd_insert!(a, `0`, fmsub)
40094	}
40095	}
40096
40097	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40098	///
40099	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40100	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40101	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40102	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40103	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40104	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40105	///
40106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40107	#[inline]
40108	#[target_feature(enable = "avx512f")]
40109	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40110	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
40111	#[rustc_legacy_const_generics(`4`)]
40112	pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40113	k: __mmask8,
40114	a: __m128d,
40115	b: __m128d,
40116	c: __m128d,
40117	) -> __m128d {
40118	unsafe {
40119	static_assert_rounding!(ROUNDING);
40120	let mut fmsub: f64 = `0.`;
40121	if (k & `0b00000001`) != `0` {
40122	let extracta: f64 = simd_extract!(a, `0`);
40123	let extractb: f64 = simd_extract!(b, `0`);
40124	let extractc: f64 = simd_extract!(c, `0`);
40125	let extractc: f64 = -extractc;
40126	fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40127	}
40128	simd_insert!(a, `0`, fmsub)
40129	}
40130	}
40131
40132	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40133	///
40134	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40135	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40136	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40137	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40138	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40139	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40140	///
40141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40142	#[inline]
40143	#[target_feature(enable = "avx512f")]
40144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40145	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
40146	#[rustc_legacy_const_generics(`4`)]
40147	pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40148	a: __m128d,
40149	b: __m128d,
40150	c: __m128d,
40151	k: __mmask8,
40152	) -> __m128d {
40153	unsafe {
40154	static_assert_rounding!(ROUNDING);
40155	let mut fmsub: f64 = simd_extract!(c, `0`);
40156	if (k & `0b00000001`) != `0` {
40157	let extracta: f64 = simd_extract!(a, `0`);
40158	let extractb: f64 = simd_extract!(b, `0`);
40159	let extractc: f64 = -fmsub;
40160	fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40161	}
40162	simd_insert!(c, `0`, fmsub)
40163	}
40164	}
40165
40166	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40167	///
40168	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40169	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40170	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40171	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40172	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40173	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40174	///
40175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40176	#[inline]
40177	#[target_feature(enable = "avx512f")]
40178	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40179	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40180	#[rustc_legacy_const_generics(`3`)]
40181	pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40182	unsafe {
40183	static_assert_rounding!(ROUNDING);
40184	let extracta: f32 = simd_extract!(a, `0`);
40185	let extracta: f32 = -extracta;
40186	let extractb: f32 = simd_extract!(b, `0`);
40187	let extractc: f32 = simd_extract!(c, `0`);
40188	let fnmadd: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40189	simd_insert!(a, `0`, fnmadd)
40190	}
40191	}
40192
40193	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40194	///
40195	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40196	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40197	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40198	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40199	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40200	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40201	///
40202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40203	#[inline]
40204	#[target_feature(enable = "avx512f")]
40205	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40206	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40207	#[rustc_legacy_const_generics(`4`)]
40208	pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40209	a: __m128,
40210	k: __mmask8,
40211	b: __m128,
40212	c: __m128,
40213	) -> __m128 {
40214	unsafe {
40215	static_assert_rounding!(ROUNDING);
40216	let mut fnmadd: f32 = simd_extract!(a, `0`);
40217	if (k & `0b00000001`) != `0` {
40218	let extracta: f32 = -fnmadd;
40219	let extractb: f32 = simd_extract!(b, `0`);
40220	let extractc: f32 = simd_extract!(c, `0`);
40221	fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40222	}
40223	simd_insert!(a, `0`, fnmadd)
40224	}
40225	}
40226
40227	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40228	///
40229	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40230	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40231	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40232	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40233	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40234	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40235	///
40236	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40237	#[inline]
40238	#[target_feature(enable = "avx512f")]
40239	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40240	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40241	#[rustc_legacy_const_generics(`4`)]
40242	pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40243	k: __mmask8,
40244	a: __m128,
40245	b: __m128,
40246	c: __m128,
40247	) -> __m128 {
40248	unsafe {
40249	static_assert_rounding!(ROUNDING);
40250	let mut fnmadd: f32 = `0.`;
40251	if (k & `0b00000001`) != `0` {
40252	let extracta: f32 = simd_extract!(a, `0`);
40253	let extracta: f32 = -extracta;
40254	let extractb: f32 = simd_extract!(b, `0`);
40255	let extractc: f32 = simd_extract!(c, `0`);
40256	fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40257	}
40258	simd_insert!(a, `0`, fnmadd)
40259	}
40260	}
40261
40262	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40263	///
40264	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40265	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40266	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40267	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40268	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40269	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40270	///
40271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40272	#[inline]
40273	#[target_feature(enable = "avx512f")]
40274	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40275	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40276	#[rustc_legacy_const_generics(`4`)]
40277	pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40278	a: __m128,
40279	b: __m128,
40280	c: __m128,
40281	k: __mmask8,
40282	) -> __m128 {
40283	unsafe {
40284	static_assert_rounding!(ROUNDING);
40285	let mut fnmadd: f32 = simd_extract!(c, `0`);
40286	if (k & `0b00000001`) != `0` {
40287	let extracta: f32 = simd_extract!(a, `0`);
40288	let extracta: f32 = -extracta;
40289	let extractb: f32 = simd_extract!(b, `0`);
40290	fnmadd = vfmaddssround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
40291	}
40292	simd_insert!(c, `0`, fnmadd)
40293	}
40294	}
40295
40296	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40297	///
40298	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40299	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40300	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40301	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40302	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40303	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40304	///
40305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40306	#[inline]
40307	#[target_feature(enable = "avx512f")]
40308	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40309	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40310	#[rustc_legacy_const_generics(`3`)]
40311	pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40312	unsafe {
40313	static_assert_rounding!(ROUNDING);
40314	let extracta: f64 = simd_extract!(a, `0`);
40315	let extracta: f64 = -extracta;
40316	let extractb: f64 = simd_extract!(b, `0`);
40317	let extractc: f64 = simd_extract!(c, `0`);
40318	let fnmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40319	simd_insert!(a, `0`, fnmadd)
40320	}
40321	}
40322
40323	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40324	///
40325	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40326	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40327	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40328	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40329	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40330	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40331	///
40332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40333	#[inline]
40334	#[target_feature(enable = "avx512f")]
40335	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40336	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40337	#[rustc_legacy_const_generics(`4`)]
40338	pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40339	a: __m128d,
40340	k: __mmask8,
40341	b: __m128d,
40342	c: __m128d,
40343	) -> __m128d {
40344	unsafe {
40345	static_assert_rounding!(ROUNDING);
40346	let mut fnmadd: f64 = simd_extract!(a, `0`);
40347	if (k & `0b00000001`) != `0` {
40348	let extracta: f64 = -fnmadd;
40349	let extractb: f64 = simd_extract!(b, `0`);
40350	let extractc: f64 = simd_extract!(c, `0`);
40351	fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40352	}
40353	simd_insert!(a, `0`, fnmadd)
40354	}
40355	}
40356
40357	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40358	///
40359	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40360	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40361	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40362	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40363	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40364	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40365	///
40366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40367	#[inline]
40368	#[target_feature(enable = "avx512f")]
40369	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40370	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40371	#[rustc_legacy_const_generics(`4`)]
40372	pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40373	k: __mmask8,
40374	a: __m128d,
40375	b: __m128d,
40376	c: __m128d,
40377	) -> __m128d {
40378	unsafe {
40379	static_assert_rounding!(ROUNDING);
40380	let mut fnmadd: f64 = `0.`;
40381	if (k & `0b00000001`) != `0` {
40382	let extracta: f64 = simd_extract!(a, `0`);
40383	let extracta: f64 = -extracta;
40384	let extractb: f64 = simd_extract!(b, `0`);
40385	let extractc: f64 = simd_extract!(c, `0`);
40386	fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40387	}
40388	simd_insert!(a, `0`, fnmadd)
40389	}
40390	}
40391
40392	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40393	///
40394	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40395	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40396	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40397	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40398	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40399	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40400	///
40401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40402	#[inline]
40403	#[target_feature(enable = "avx512f")]
40404	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40405	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
40406	#[rustc_legacy_const_generics(`4`)]
40407	pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40408	a: __m128d,
40409	b: __m128d,
40410	c: __m128d,
40411	k: __mmask8,
40412	) -> __m128d {
40413	unsafe {
40414	static_assert_rounding!(ROUNDING);
40415	let mut fnmadd: f64 = simd_extract!(c, `0`);
40416	if (k & `0b00000001`) != `0` {
40417	let extracta: f64 = simd_extract!(a, `0`);
40418	let extracta: f64 = -extracta;
40419	let extractb: f64 = simd_extract!(b, `0`);
40420	fnmadd = vfmaddsdround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
40421	}
40422	simd_insert!(c, `0`, fnmadd)
40423	}
40424	}
40425
40426	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40427	///
40428	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40429	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40430	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40431	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40432	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40433	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40434	///
40435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40436	#[inline]
40437	#[target_feature(enable = "avx512f")]
40438	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40439	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40440	#[rustc_legacy_const_generics(`3`)]
40441	pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40442	unsafe {
40443	static_assert_rounding!(ROUNDING);
40444	let extracta: f32 = simd_extract!(a, `0`);
40445	let extracta: f32 = -extracta;
40446	let extractb: f32 = simd_extract!(b, `0`);
40447	let extractc: f32 = simd_extract!(c, `0`);
40448	let extractc: f32 = -extractc;
40449	let fnmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40450	simd_insert!(a, `0`, fnmsub)
40451	}
40452	}
40453
40454	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40455	///
40456	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40457	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40458	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40459	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40460	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40461	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40462	///
40463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40464	#[inline]
40465	#[target_feature(enable = "avx512f")]
40466	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40467	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40468	#[rustc_legacy_const_generics(`4`)]
40469	pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40470	a: __m128,
40471	k: __mmask8,
40472	b: __m128,
40473	c: __m128,
40474	) -> __m128 {
40475	unsafe {
40476	static_assert_rounding!(ROUNDING);
40477	let mut fnmsub: f32 = simd_extract!(a, `0`);
40478	if (k & `0b00000001`) != `0` {
40479	let extracta: f32 = -fnmsub;
40480	let extractb: f32 = simd_extract!(b, `0`);
40481	let extractc: f32 = simd_extract!(c, `0`);
40482	let extractc: f32 = -extractc;
40483	fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40484	}
40485	simd_insert!(a, `0`, fnmsub)
40486	}
40487	}
40488
40489	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40490	///
40491	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40492	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40493	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40494	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40495	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40496	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40497	///
40498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40499	#[inline]
40500	#[target_feature(enable = "avx512f")]
40501	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40502	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40503	#[rustc_legacy_const_generics(`4`)]
40504	pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40505	k: __mmask8,
40506	a: __m128,
40507	b: __m128,
40508	c: __m128,
40509	) -> __m128 {
40510	unsafe {
40511	static_assert_rounding!(ROUNDING);
40512	let mut fnmsub: f32 = `0.`;
40513	if (k & `0b00000001`) != `0` {
40514	let extracta: f32 = simd_extract!(a, `0`);
40515	let extracta: f32 = -extracta;
40516	let extractb: f32 = simd_extract!(b, `0`);
40517	let extractc: f32 = simd_extract!(c, `0`);
40518	let extractc: f32 = -extractc;
40519	fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40520	}
40521	simd_insert!(a, `0`, fnmsub)
40522	}
40523	}
40524
40525	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40526	///
40527	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40528	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40529	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40530	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40531	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40532	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40533	///
40534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40535	#[inline]
40536	#[target_feature(enable = "avx512f")]
40537	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40538	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40539	#[rustc_legacy_const_generics(`4`)]
40540	pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40541	a: __m128,
40542	b: __m128,
40543	c: __m128,
40544	k: __mmask8,
40545	) -> __m128 {
40546	unsafe {
40547	static_assert_rounding!(ROUNDING);
40548	let mut fnmsub: f32 = simd_extract!(c, `0`);
40549	if (k & `0b00000001`) != `0` {
40550	let extracta: f32 = simd_extract!(a, `0`);
40551	let extracta: f32 = -extracta;
40552	let extractb: f32 = simd_extract!(b, `0`);
40553	let extractc: f32 = -fnmsub;
40554	fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40555	}
40556	simd_insert!(c, `0`, fnmsub)
40557	}
40558	}
40559
40560	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40561	///
40562	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40563	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40564	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40565	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40566	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40567	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40568	///
40569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40570	#[inline]
40571	#[target_feature(enable = "avx512f")]
40572	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40573	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40574	#[rustc_legacy_const_generics(`3`)]
40575	pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40576	unsafe {
40577	static_assert_rounding!(ROUNDING);
40578	let extracta: f64 = simd_extract!(a, `0`);
40579	let extracta: f64 = -extracta;
40580	let extractb: f64 = simd_extract!(b, `0`);
40581	let extractc: f64 = simd_extract!(c, `0`);
40582	let extractc: f64 = -extractc;
40583	let fnmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40584	simd_insert!(a, `0`, fnmsub)
40585	}
40586	}
40587
40588	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40589	///
40590	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40591	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40592	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40593	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40594	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40595	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40596	///
40597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40598	#[inline]
40599	#[target_feature(enable = "avx512f")]
40600	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40601	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40602	#[rustc_legacy_const_generics(`4`)]
40603	pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40604	a: __m128d,
40605	k: __mmask8,
40606	b: __m128d,
40607	c: __m128d,
40608	) -> __m128d {
40609	unsafe {
40610	static_assert_rounding!(ROUNDING);
40611	let mut fnmsub: f64 = simd_extract!(a, `0`);
40612	if (k & `0b00000001`) != `0` {
40613	let extracta: f64 = -fnmsub;
40614	let extractb: f64 = simd_extract!(b, `0`);
40615	let extractc: f64 = simd_extract!(c, `0`);
40616	let extractc: f64 = -extractc;
40617	fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40618	}
40619	simd_insert!(a, `0`, fnmsub)
40620	}
40621	}
40622
40623	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40624	///
40625	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40626	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40627	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40628	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40629	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40630	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40631	///
40632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40633	#[inline]
40634	#[target_feature(enable = "avx512f")]
40635	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40636	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40637	#[rustc_legacy_const_generics(`4`)]
40638	pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40639	k: __mmask8,
40640	a: __m128d,
40641	b: __m128d,
40642	c: __m128d,
40643	) -> __m128d {
40644	unsafe {
40645	static_assert_rounding!(ROUNDING);
40646	let mut fnmsub: f64 = `0.`;
40647	if (k & `0b00000001`) != `0` {
40648	let extracta: f64 = simd_extract!(a, `0`);
40649	let extracta: f64 = -extracta;
40650	let extractb: f64 = simd_extract!(b, `0`);
40651	let extractc: f64 = simd_extract!(c, `0`);
40652	let extractc: f64 = -extractc;
40653	fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40654	}
40655	simd_insert!(a, `0`, fnmsub)
40656	}
40657	}
40658
40659	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40660	///
40661	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40662	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40663	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40664	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40665	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40666	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40667	///
40668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40669	#[inline]
40670	#[target_feature(enable = "avx512f")]
40671	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40672	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
40673	#[rustc_legacy_const_generics(`4`)]
40674	pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40675	a: __m128d,
40676	b: __m128d,
40677	c: __m128d,
40678	k: __mmask8,
40679	) -> __m128d {
40680	unsafe {
40681	static_assert_rounding!(ROUNDING);
40682	let mut fnmsub: f64 = simd_extract!(c, `0`);
40683	if (k & `0b00000001`) != `0` {
40684	let extracta: f64 = simd_extract!(a, `0`);
40685	let extracta: f64 = -extracta;
40686	let extractb: f64 = simd_extract!(b, `0`);
40687	let extractc: f64 = -fnmsub;
40688	fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40689	}
40690	simd_insert!(c, `0`, fnmsub)
40691	}
40692	}
40693
40694	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40695	///
40696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40697	#[inline]
40698	#[target_feature(enable = "avx512f")]
40699	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40700	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
40701	#[rustc_legacy_const_generics(`3`)]
40702	pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40703	unsafe {
40704	static_assert_uimm_bits!(IMM8, `8`);
40705	let a: f32x4 = a.as_f32x4();
40706	let b: f32x4 = b.as_f32x4();
40707	let c: i32x4 = c.as_i32x4();
40708	let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
40709	let fixupimm: f32 = simd_extract!(r, `0`);
40710	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
40711	transmute(src:r)
40712	}
40713	}
40714
40715	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40716	///
40717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40718	#[inline]
40719	#[target_feature(enable = "avx512f")]
40720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40721	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
40722	#[rustc_legacy_const_generics(`4`)]
40723	pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40724	a: __m128,
40725	k: __mmask8,
40726	b: __m128,
40727	c: __m128i,
40728	) -> __m128 {
40729	unsafe {
40730	static_assert_uimm_bits!(IMM8, `8`);
40731	let a: f32x4 = a.as_f32x4();
40732	let b: f32x4 = b.as_f32x4();
40733	let c: i32x4 = c.as_i32x4();
40734	let fixupimm: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40735	let fixupimm: f32 = simd_extract!(fixupimm, `0`);
40736	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
40737	transmute(src:r)
40738	}
40739	}
40740
40741	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40742	///
40743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40744	#[inline]
40745	#[target_feature(enable = "avx512f")]
40746	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40747	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
40748	#[rustc_legacy_const_generics(`4`)]
40749	pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40750	k: __mmask8,
40751	a: __m128,
40752	b: __m128,
40753	c: __m128i,
40754	) -> __m128 {
40755	unsafe {
40756	static_assert_uimm_bits!(IMM8, `8`);
40757	let a: f32x4 = a.as_f32x4();
40758	let b: f32x4 = b.as_f32x4();
40759	let c: i32x4 = c.as_i32x4();
40760	let fixupimm: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40761	let fixupimm: f32 = simd_extract!(fixupimm, `0`);
40762	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
40763	transmute(src:r)
40764	}
40765	}
40766
40767	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40768	///
40769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40770	#[inline]
40771	#[target_feature(enable = "avx512f")]
40772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40773	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
40774	#[rustc_legacy_const_generics(`3`)]
40775	pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40776	unsafe {
40777	static_assert_uimm_bits!(IMM8, `8`);
40778	let a: f64x2 = a.as_f64x2();
40779	let b: f64x2 = b.as_f64x2();
40780	let c: i64x2 = c.as_i64x2();
40781	let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
40782	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
40783	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
40784	transmute(src:r)
40785	}
40786	}
40787
40788	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40789	///
40790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40791	#[inline]
40792	#[target_feature(enable = "avx512f")]
40793	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40794	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
40795	#[rustc_legacy_const_generics(`4`)]
40796	pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40797	a: __m128d,
40798	k: __mmask8,
40799	b: __m128d,
40800	c: __m128i,
40801	) -> __m128d {
40802	unsafe {
40803	static_assert_uimm_bits!(IMM8, `8`);
40804	let a: f64x2 = a.as_f64x2();
40805	let b: f64x2 = b.as_f64x2();
40806	let c: i64x2 = c.as_i64x2();
40807	let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40808	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
40809	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
40810	transmute(src:r)
40811	}
40812	}
40813
40814	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40815	///
40816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40817	#[inline]
40818	#[target_feature(enable = "avx512f")]
40819	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40820	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
40821	#[rustc_legacy_const_generics(`4`)]
40822	pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40823	k: __mmask8,
40824	a: __m128d,
40825	b: __m128d,
40826	c: __m128i,
40827	) -> __m128d {
40828	unsafe {
40829	static_assert_uimm_bits!(IMM8, `8`);
40830	let a: f64x2 = a.as_f64x2();
40831	let b: f64x2 = b.as_f64x2();
40832	let c: i64x2 = c.as_i64x2();
40833	let fixupimm: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40834	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
40835	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
40836	transmute(src:r)
40837	}
40838	}
40839
40840	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40841	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40842	///
40843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40844	#[inline]
40845	#[target_feature(enable = "avx512f")]
40846	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40847	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
40848	#[rustc_legacy_const_generics(`3`, `4`)]
40849	pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40850	a: __m128,
40851	b: __m128,
40852	c: __m128i,
40853	) -> __m128 {
40854	unsafe {
40855	static_assert_uimm_bits!(IMM8, `8`);
40856	static_assert_mantissas_sae!(SAE);
40857	let a: f32x4 = a.as_f32x4();
40858	let b: f32x4 = b.as_f32x4();
40859	let c: i32x4 = c.as_i32x4();
40860	let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:`0b11111111`, SAE);
40861	let fixupimm: f32 = simd_extract!(r, `0`);
40862	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
40863	transmute(src:r)
40864	}
40865	}
40866
40867	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40868	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40869	///
40870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40871	#[inline]
40872	#[target_feature(enable = "avx512f")]
40873	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40874	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
40875	#[rustc_legacy_const_generics(`4`, `5`)]
40876	pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40877	a: __m128,
40878	k: __mmask8,
40879	b: __m128,
40880	c: __m128i,
40881	) -> __m128 {
40882	unsafe {
40883	static_assert_uimm_bits!(IMM8, `8`);
40884	static_assert_mantissas_sae!(SAE);
40885	let a: f32x4 = a.as_f32x4();
40886	let b: f32x4 = b.as_f32x4();
40887	let c: i32x4 = c.as_i32x4();
40888	let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
40889	let fixupimm: f32 = simd_extract!(r, `0`);
40890	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
40891	transmute(src:r)
40892	}
40893	}
40894
40895	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40896	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40897	///
40898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40899	#[inline]
40900	#[target_feature(enable = "avx512f")]
40901	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40902	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
40903	#[rustc_legacy_const_generics(`4`, `5`)]
40904	pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40905	k: __mmask8,
40906	a: __m128,
40907	b: __m128,
40908	c: __m128i,
40909	) -> __m128 {
40910	unsafe {
40911	static_assert_uimm_bits!(IMM8, `8`);
40912	static_assert_mantissas_sae!(SAE);
40913	let a: f32x4 = a.as_f32x4();
40914	let b: f32x4 = b.as_f32x4();
40915	let c: i32x4 = c.as_i32x4();
40916	let r: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
40917	let fixupimm: f32 = simd_extract!(r, `0`);
40918	let r: f32x4 = simd_insert!(a, `0`, fixupimm);
40919	transmute(src:r)
40920	}
40921	}
40922
40923	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40924	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40925	///
40926	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
40927	#[inline]
40928	#[target_feature(enable = "avx512f")]
40929	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40930	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
40931	#[rustc_legacy_const_generics(`3`, `4`)]
40932	pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40933	a: __m128d,
40934	b: __m128d,
40935	c: __m128i,
40936	) -> __m128d {
40937	unsafe {
40938	static_assert_uimm_bits!(IMM8, `8`);
40939	static_assert_mantissas_sae!(SAE);
40940	let a: f64x2 = a.as_f64x2();
40941	let b: f64x2 = b.as_f64x2();
40942	let c: i64x2 = c.as_i64x2();
40943	let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:`0b11111111`, SAE);
40944	let fixupimm: f64 = simd_extract!(r, `0`);
40945	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
40946	transmute(src:r)
40947	}
40948	}
40949
40950	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40951	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40952	///
40953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
40954	#[inline]
40955	#[target_feature(enable = "avx512f")]
40956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40957	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
40958	#[rustc_legacy_const_generics(`4`, `5`)]
40959	pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40960	a: __m128d,
40961	k: __mmask8,
40962	b: __m128d,
40963	c: __m128i,
40964	) -> __m128d {
40965	unsafe {
40966	static_assert_uimm_bits!(IMM8, `8`);
40967	static_assert_mantissas_sae!(SAE);
40968	let a: f64x2 = a.as_f64x2();
40969	let b: f64x2 = b.as_f64x2();
40970	let c: i64x2 = c.as_i64x2();
40971	let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
40972	let fixupimm: f64 = simd_extract!(r, `0`);
40973	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
40974	transmute(src:r)
40975	}
40976	}
40977
40978	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40979	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40980	///
40981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
40982	#[inline]
40983	#[target_feature(enable = "avx512f")]
40984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40985	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
40986	#[rustc_legacy_const_generics(`4`, `5`)]
40987	pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40988	k: __mmask8,
40989	a: __m128d,
40990	b: __m128d,
40991	c: __m128i,
40992	) -> __m128d {
40993	unsafe {
40994	static_assert_uimm_bits!(IMM8, `8`);
40995	static_assert_mantissas_sae!(SAE);
40996	let a: f64x2 = a.as_f64x2();
40997	let b: f64x2 = b.as_f64x2();
40998	let c: i64x2 = c.as_i64x2();
40999	let r: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
41000	let fixupimm: f64 = simd_extract!(r, `0`);
41001	let r: f64x2 = simd_insert!(a, `0`, fixupimm);
41002	transmute(src:r)
41003	}
41004	}
41005
41006	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41007	///
41008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41009	#[inline]
41010	#[target_feature(enable = "avx512f")]
41011	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41012	#[cfg_attr(test, assert_instr(vcvtss2sd))]
41013	pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41014	unsafe {
41015	transmute(src:vcvtss2sd(
41016	a.as_f64x2(),
41017	b.as_f32x4(),
41018	src.as_f64x2(),
41019	mask:k,
41020	_MM_FROUND_CUR_DIRECTION,
41021	))
41022	}
41023	}
41024
41025	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41026	///
41027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41028	#[inline]
41029	#[target_feature(enable = "avx512f")]
41030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41031	#[cfg_attr(test, assert_instr(vcvtss2sd))]
41032	pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41033	unsafe {
41034	transmute(src:vcvtss2sd(
41035	a.as_f64x2(),
41036	b.as_f32x4(),
41037	src:f64x2::ZERO,
41038	mask:k,
41039	_MM_FROUND_CUR_DIRECTION,
41040	))
41041	}
41042	}
41043
41044	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41045	///
41046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41047	#[inline]
41048	#[target_feature(enable = "avx512f")]
41049	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41050	#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41051	pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41052	unsafe {
41053	transmute(src:vcvtsd2ss(
41054	a.as_f32x4(),
41055	b.as_f64x2(),
41056	src.as_f32x4(),
41057	mask:k,
41058	_MM_FROUND_CUR_DIRECTION,
41059	))
41060	}
41061	}
41062
41063	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41064	///
41065	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41066	#[inline]
41067	#[target_feature(enable = "avx512f")]
41068	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41069	#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41070	pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41071	unsafe {
41072	transmute(src:vcvtsd2ss(
41073	a.as_f32x4(),
41074	b.as_f64x2(),
41075	src:f32x4::ZERO,
41076	mask:k,
41077	_MM_FROUND_CUR_DIRECTION,
41078	))
41079	}
41080	}
41081
41082	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41083	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41084	///
41085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41086	#[inline]
41087	#[target_feature(enable = "avx512f")]
41088	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41089	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
41090	#[rustc_legacy_const_generics(`2`)]
41091	pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41092	unsafe {
41093	static_assert_sae!(SAE);
41094	let a: f64x2 = a.as_f64x2();
41095	let b: f32x4 = b.as_f32x4();
41096	let r: f64x2 = vcvtss2sd(a, b, src:f64x2::ZERO, mask:`0b11111111`, SAE);
41097	transmute(src:r)
41098	}
41099	}
41100
41101	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41102	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41103	///
41104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41105	#[inline]
41106	#[target_feature(enable = "avx512f")]
41107	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41108	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
41109	#[rustc_legacy_const_generics(`4`)]
41110	pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41111	src: __m128d,
41112	k: __mmask8,
41113	a: __m128d,
41114	b: __m128,
41115	) -> __m128d {
41116	unsafe {
41117	static_assert_sae!(SAE);
41118	let a: f64x2 = a.as_f64x2();
41119	let b: f32x4 = b.as_f32x4();
41120	let src: f64x2 = src.as_f64x2();
41121	let r: f64x2 = vcvtss2sd(a, b, src, mask:k, SAE);
41122	transmute(src:r)
41123	}
41124	}
41125
41126	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41127	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41128	///
41129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41130	#[inline]
41131	#[target_feature(enable = "avx512f")]
41132	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41133	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
41134	#[rustc_legacy_const_generics(`3`)]
41135	pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41136	unsafe {
41137	static_assert_sae!(SAE);
41138	let a: f64x2 = a.as_f64x2();
41139	let b: f32x4 = b.as_f32x4();
41140	let r: f64x2 = vcvtss2sd(a, b, src:f64x2::ZERO, mask:k, SAE);
41141	transmute(src:r)
41142	}
41143	}
41144
41145	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41146	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41147	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41148	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41149	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41150	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41151	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41152	///
41153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41154	#[inline]
41155	#[target_feature(enable = "avx512f")]
41156	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41157	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
41158	#[rustc_legacy_const_generics(`2`)]
41159	pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41160	unsafe {
41161	static_assert_rounding!(ROUNDING);
41162	let a: f32x4 = a.as_f32x4();
41163	let b: f64x2 = b.as_f64x2();
41164	let r: f32x4 = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:`0b11111111`, ROUNDING);
41165	transmute(src:r)
41166	}
41167	}
41168
41169	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41170	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41171	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41172	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41173	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41174	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41175	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41176	///
41177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41178	#[inline]
41179	#[target_feature(enable = "avx512f")]
41180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41181	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
41182	#[rustc_legacy_const_generics(`4`)]
41183	pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41184	src: __m128,
41185	k: __mmask8,
41186	a: __m128,
41187	b: __m128d,
41188	) -> __m128 {
41189	unsafe {
41190	static_assert_rounding!(ROUNDING);
41191	let a: f32x4 = a.as_f32x4();
41192	let b: f64x2 = b.as_f64x2();
41193	let src: f32x4 = src.as_f32x4();
41194	let r: f32x4 = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
41195	transmute(src:r)
41196	}
41197	}
41198
41199	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41200	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41201	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41202	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41203	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41204	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41205	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41206	///
41207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41208	#[inline]
41209	#[target_feature(enable = "avx512f")]
41210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41211	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
41212	#[rustc_legacy_const_generics(`3`)]
41213	pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41214	unsafe {
41215	static_assert_rounding!(ROUNDING);
41216	let a: f32x4 = a.as_f32x4();
41217	let b: f64x2 = b.as_f64x2();
41218	let r: f32x4 = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
41219	transmute(src:r)
41220	}
41221	}
41222
41223	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41224	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41225	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41226	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41227	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41228	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41229	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41230	///
41231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41232	#[inline]
41233	#[target_feature(enable = "avx512f")]
41234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41235	#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = `8`))]
41236	#[rustc_legacy_const_generics(`1`)]
41237	pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41238	unsafe {
41239	static_assert_rounding!(ROUNDING);
41240	let a: f32x4 = a.as_f32x4();
41241	vcvtss2si(a, ROUNDING)
41242	}
41243	}
41244
41245	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41246	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41247	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41248	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41249	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41250	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41251	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41252	///
41253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41254	#[inline]
41255	#[target_feature(enable = "avx512f")]
41256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41257	#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = `8`))]
41258	#[rustc_legacy_const_generics(`1`)]
41259	pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41260	unsafe {
41261	static_assert_rounding!(ROUNDING);
41262	let a: f32x4 = a.as_f32x4();
41263	vcvtss2si(a, ROUNDING)
41264	}
41265	}
41266
41267	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41268	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41269	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41270	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41271	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41272	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41273	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41274	///
41275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41276	#[inline]
41277	#[target_feature(enable = "avx512f")]
41278	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41279	#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = `8`))]
41280	#[rustc_legacy_const_generics(`1`)]
41281	pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41282	unsafe {
41283	static_assert_rounding!(ROUNDING);
41284	let a: f32x4 = a.as_f32x4();
41285	vcvtss2usi(a, ROUNDING)
41286	}
41287	}
41288
41289	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41290	///
41291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41292	#[inline]
41293	#[target_feature(enable = "avx512f")]
41294	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41295	#[cfg_attr(test, assert_instr(vcvtss2si))]
41296	pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41297	unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41298	}
41299
41300	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41301	///
41302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41303	#[inline]
41304	#[target_feature(enable = "avx512f")]
41305	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41306	#[cfg_attr(test, assert_instr(vcvtss2usi))]
41307	pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41308	unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41309	}
41310
41311	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41312	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41313	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41314	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41315	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41316	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41317	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41318	///
41319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41320	#[inline]
41321	#[target_feature(enable = "avx512f")]
41322	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41323	#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = `8`))]
41324	#[rustc_legacy_const_generics(`1`)]
41325	pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41326	unsafe {
41327	static_assert_rounding!(ROUNDING);
41328	let a: f64x2 = a.as_f64x2();
41329	vcvtsd2si(a, ROUNDING)
41330	}
41331	}
41332
41333	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41334	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41335	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41336	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41337	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41338	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41339	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41340	///
41341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41342	#[inline]
41343	#[target_feature(enable = "avx512f")]
41344	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41345	#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = `8`))]
41346	#[rustc_legacy_const_generics(`1`)]
41347	pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41348	unsafe {
41349	static_assert_rounding!(ROUNDING);
41350	let a: f64x2 = a.as_f64x2();
41351	vcvtsd2si(a, ROUNDING)
41352	}
41353	}
41354
41355	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41356	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41357	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41358	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41359	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41360	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41361	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41362	///
41363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41364	#[inline]
41365	#[target_feature(enable = "avx512f")]
41366	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41367	#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = `8`))]
41368	#[rustc_legacy_const_generics(`1`)]
41369	pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41370	unsafe {
41371	static_assert_rounding!(ROUNDING);
41372	let a: f64x2 = a.as_f64x2();
41373	vcvtsd2usi(a, ROUNDING)
41374	}
41375	}
41376
41377	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41378	///
41379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41380	#[inline]
41381	#[target_feature(enable = "avx512f")]
41382	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41383	#[cfg_attr(test, assert_instr(vcvtsd2si))]
41384	pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41385	unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41386	}
41387
41388	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41389	///
41390	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41391	#[inline]
41392	#[target_feature(enable = "avx512f")]
41393	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41394	#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41395	pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41396	unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41397	}
41398
41399	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41400	///
41401	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41402	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41403	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41404	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41405	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41406	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41407	///
41408	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41409	#[inline]
41410	#[target_feature(enable = "avx512f")]
41411	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41412	#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = `8`))]
41413	#[rustc_legacy_const_generics(`2`)]
41414	pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41415	unsafe {
41416	static_assert_rounding!(ROUNDING);
41417	let a: f32x4 = a.as_f32x4();
41418	let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
41419	transmute(src:r)
41420	}
41421	}
41422
41423	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41424	///
41425	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41426	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41427	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41428	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41429	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41430	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41431	///
41432	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41433	#[inline]
41434	#[target_feature(enable = "avx512f")]
41435	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41436	#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = `8`))]
41437	#[rustc_legacy_const_generics(`2`)]
41438	pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41439	unsafe {
41440	static_assert_rounding!(ROUNDING);
41441	let a: f32x4 = a.as_f32x4();
41442	let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
41443	transmute(src:r)
41444	}
41445	}
41446
41447	/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41448	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41449	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41450	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41451	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41452	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41453	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41454	///
41455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41456	#[inline]
41457	#[target_feature(enable = "avx512f")]
41458	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41459	#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = `8`))]
41460	#[rustc_legacy_const_generics(`2`)]
41461	pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41462	unsafe {
41463	static_assert_rounding!(ROUNDING);
41464	let a: f32x4 = a.as_f32x4();
41465	let r: f32x4 = vcvtusi2ss(a, b, ROUNDING);
41466	transmute(src:r)
41467	}
41468	}
41469
41470	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41471	///
41472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41473	#[inline]
41474	#[target_feature(enable = "avx512f")]
41475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41476	#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41477	pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41478	unsafe {
41479	let b: f32 = b as f32;
41480	simd_insert!(a, `0`, b)
41481	}
41482	}
41483
41484	/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41485	///
41486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41487	#[inline]
41488	#[target_feature(enable = "avx512f")]
41489	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41490	#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41491	pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41492	unsafe {
41493	let b: f64 = b as f64;
41494	simd_insert!(a, `0`, b)
41495	}
41496	}
41497
41498	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41499	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41500	///
41501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41502	#[inline]
41503	#[target_feature(enable = "avx512f")]
41504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41505	#[cfg_attr(test, assert_instr(vcvttss2si, SAE = `8`))]
41506	#[rustc_legacy_const_generics(`1`)]
41507	pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41508	unsafe {
41509	static_assert_sae!(SAE);
41510	let a: f32x4 = a.as_f32x4();
41511	vcvttss2si(a, SAE)
41512	}
41513	}
41514
41515	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41516	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41517	///
41518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41519	#[inline]
41520	#[target_feature(enable = "avx512f")]
41521	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41522	#[cfg_attr(test, assert_instr(vcvttss2si, SAE = `8`))]
41523	#[rustc_legacy_const_generics(`1`)]
41524	pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41525	unsafe {
41526	static_assert_sae!(SAE);
41527	let a: f32x4 = a.as_f32x4();
41528	vcvttss2si(a, SAE)
41529	}
41530	}
41531
41532	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41533	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41534	///
41535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41536	#[inline]
41537	#[target_feature(enable = "avx512f")]
41538	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41539	#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = `8`))]
41540	#[rustc_legacy_const_generics(`1`)]
41541	pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41542	unsafe {
41543	static_assert_sae!(SAE);
41544	let a: f32x4 = a.as_f32x4();
41545	vcvttss2usi(a, SAE)
41546	}
41547	}
41548
41549	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41550	///
41551	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41552	#[inline]
41553	#[target_feature(enable = "avx512f")]
41554	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41555	#[cfg_attr(test, assert_instr(vcvttss2si))]
41556	pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41557	unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41558	}
41559
41560	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41561	///
41562	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41563	#[inline]
41564	#[target_feature(enable = "avx512f")]
41565	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41566	#[cfg_attr(test, assert_instr(vcvttss2usi))]
41567	pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41568	unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41569	}
41570
41571	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41572	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41573	///
41574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41575	#[inline]
41576	#[target_feature(enable = "avx512f")]
41577	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41578	#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = `8`))]
41579	#[rustc_legacy_const_generics(`1`)]
41580	pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41581	unsafe {
41582	static_assert_sae!(SAE);
41583	let a: f64x2 = a.as_f64x2();
41584	vcvttsd2si(a, SAE)
41585	}
41586	}
41587
41588	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41589	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41590	///
41591	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41592	#[inline]
41593	#[target_feature(enable = "avx512f")]
41594	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41595	#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = `8`))]
41596	#[rustc_legacy_const_generics(`1`)]
41597	pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41598	unsafe {
41599	static_assert_sae!(SAE);
41600	let a: f64x2 = a.as_f64x2();
41601	vcvttsd2si(a, SAE)
41602	}
41603	}
41604
41605	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41606	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41607	///
41608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41609	#[inline]
41610	#[target_feature(enable = "avx512f")]
41611	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41612	#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = `8`))]
41613	#[rustc_legacy_const_generics(`1`)]
41614	pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41615	unsafe {
41616	static_assert_sae!(SAE);
41617	let a: f64x2 = a.as_f64x2();
41618	vcvttsd2usi(a, SAE)
41619	}
41620	}
41621
41622	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41623	///
41624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41625	#[inline]
41626	#[target_feature(enable = "avx512f")]
41627	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41628	#[cfg_attr(test, assert_instr(vcvttsd2si))]
41629	pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41630	unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41631	}
41632
41633	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41634	///
41635	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41636	#[inline]
41637	#[target_feature(enable = "avx512f")]
41638	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41639	#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41640	pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41641	unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41642	}
41643
41644	/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41645	///
41646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41647	#[inline]
41648	#[target_feature(enable = "avx512f")]
41649	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41650	#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41651	pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41652	unsafe {
41653	let b: f32 = b as f32;
41654	simd_insert!(a, `0`, b)
41655	}
41656	}
41657
41658	/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41659	///
41660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41661	#[inline]
41662	#[target_feature(enable = "avx512f")]
41663	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41664	#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41665	pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41666	unsafe {
41667	let b: f64 = b as f64;
41668	simd_insert!(a, `0`, b)
41669	}
41670	}
41671
41672	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41673	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41674	///
41675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41676	#[inline]
41677	#[target_feature(enable = "avx512f")]
41678	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41679	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `5`, SAE = `4`))] //should be vcomiss
41680	#[rustc_legacy_const_generics(`2`, `3`)]
41681	pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41682	unsafe {
41683	static_assert_uimm_bits!(IMM5, `5`);
41684	static_assert_mantissas_sae!(SAE);
41685	let a: f32x4 = a.as_f32x4();
41686	let b: f32x4 = b.as_f32x4();
41687	vcomiss(a, b, IMM5, SAE)
41688	}
41689	}
41690
41691	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41692	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41693	///
41694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41695	#[inline]
41696	#[target_feature(enable = "avx512f")]
41697	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41698	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `5`, SAE = `4`))] //should be vcomisd
41699	#[rustc_legacy_const_generics(`2`, `3`)]
41700	pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41701	unsafe {
41702	static_assert_uimm_bits!(IMM5, `5`);
41703	static_assert_mantissas_sae!(SAE);
41704	let a: f64x2 = a.as_f64x2();
41705	let b: f64x2 = b.as_f64x2();
41706	vcomisd(a, b, IMM5, SAE)
41707	}
41708	}
41709
41710	/// Equal
41711	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41712	pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = `0x00`;
41713	/// Less-than
41714	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41715	pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = `0x01`;
41716	/// Less-than-or-equal
41717	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41718	pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = `0x02`;
41719	/// False
41720	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41721	pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = `0x03`;
41722	/// Not-equal
41723	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41724	pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = `0x04`;
41725	/// Not less-than
41726	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41727	pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = `0x05`;
41728	/// Not less-than-or-equal
41729	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41730	pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = `0x06`;
41731	/// True
41732	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41733	pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = `0x07`;
41734
41735	/// interval [1, 2)
41736	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41737	pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = `0x00`;
41738	/// interval [0.5, 2)
41739	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41740	pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = `0x01`;
41741	/// interval [0.5, 1)
41742	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41743	pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = `0x02`;
41744	/// interval [0.75, 1.5)
41745	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41746	pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = `0x03`;
41747
41748	/// sign = sign(SRC)
41749	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41750	pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = `0x00`;
41751	/// sign = 0
41752	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41753	pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = `0x01`;
41754	/// DEST = NaN if sign(SRC) = 1
41755	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41756	pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = `0x02`;
41757
41758	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41759	pub const _MM_PERM_AAAA: _MM_PERM_ENUM = `0x00`;
41760	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41761	pub const _MM_PERM_AAAB: _MM_PERM_ENUM = `0x01`;
41762	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41763	pub const _MM_PERM_AAAC: _MM_PERM_ENUM = `0x02`;
41764	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41765	pub const _MM_PERM_AAAD: _MM_PERM_ENUM = `0x03`;
41766	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41767	pub const _MM_PERM_AABA: _MM_PERM_ENUM = `0x04`;
41768	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41769	pub const _MM_PERM_AABB: _MM_PERM_ENUM = `0x05`;
41770	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41771	pub const _MM_PERM_AABC: _MM_PERM_ENUM = `0x06`;
41772	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41773	pub const _MM_PERM_AABD: _MM_PERM_ENUM = `0x07`;
41774	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41775	pub const _MM_PERM_AACA: _MM_PERM_ENUM = `0x08`;
41776	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41777	pub const _MM_PERM_AACB: _MM_PERM_ENUM = `0x09`;
41778	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41779	pub const _MM_PERM_AACC: _MM_PERM_ENUM = `0x0A`;
41780	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41781	pub const _MM_PERM_AACD: _MM_PERM_ENUM = `0x0B`;
41782	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41783	pub const _MM_PERM_AADA: _MM_PERM_ENUM = `0x0C`;
41784	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41785	pub const _MM_PERM_AADB: _MM_PERM_ENUM = `0x0D`;
41786	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41787	pub const _MM_PERM_AADC: _MM_PERM_ENUM = `0x0E`;
41788	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41789	pub const _MM_PERM_AADD: _MM_PERM_ENUM = `0x0F`;
41790	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41791	pub const _MM_PERM_ABAA: _MM_PERM_ENUM = `0x10`;
41792	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41793	pub const _MM_PERM_ABAB: _MM_PERM_ENUM = `0x11`;
41794	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41795	pub const _MM_PERM_ABAC: _MM_PERM_ENUM = `0x12`;
41796	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41797	pub const _MM_PERM_ABAD: _MM_PERM_ENUM = `0x13`;
41798	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41799	pub const _MM_PERM_ABBA: _MM_PERM_ENUM = `0x14`;
41800	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41801	pub const _MM_PERM_ABBB: _MM_PERM_ENUM = `0x15`;
41802	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41803	pub const _MM_PERM_ABBC: _MM_PERM_ENUM = `0x16`;
41804	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41805	pub const _MM_PERM_ABBD: _MM_PERM_ENUM = `0x17`;
41806	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41807	pub const _MM_PERM_ABCA: _MM_PERM_ENUM = `0x18`;
41808	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41809	pub const _MM_PERM_ABCB: _MM_PERM_ENUM = `0x19`;
41810	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41811	pub const _MM_PERM_ABCC: _MM_PERM_ENUM = `0x1A`;
41812	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41813	pub const _MM_PERM_ABCD: _MM_PERM_ENUM = `0x1B`;
41814	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41815	pub const _MM_PERM_ABDA: _MM_PERM_ENUM = `0x1C`;
41816	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41817	pub const _MM_PERM_ABDB: _MM_PERM_ENUM = `0x1D`;
41818	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41819	pub const _MM_PERM_ABDC: _MM_PERM_ENUM = `0x1E`;
41820	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41821	pub const _MM_PERM_ABDD: _MM_PERM_ENUM = `0x1F`;
41822	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41823	pub const _MM_PERM_ACAA: _MM_PERM_ENUM = `0x20`;
41824	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41825	pub const _MM_PERM_ACAB: _MM_PERM_ENUM = `0x21`;
41826	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41827	pub const _MM_PERM_ACAC: _MM_PERM_ENUM = `0x22`;
41828	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41829	pub const _MM_PERM_ACAD: _MM_PERM_ENUM = `0x23`;
41830	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41831	pub const _MM_PERM_ACBA: _MM_PERM_ENUM = `0x24`;
41832	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41833	pub const _MM_PERM_ACBB: _MM_PERM_ENUM = `0x25`;
41834	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41835	pub const _MM_PERM_ACBC: _MM_PERM_ENUM = `0x26`;
41836	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41837	pub const _MM_PERM_ACBD: _MM_PERM_ENUM = `0x27`;
41838	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41839	pub const _MM_PERM_ACCA: _MM_PERM_ENUM = `0x28`;
41840	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41841	pub const _MM_PERM_ACCB: _MM_PERM_ENUM = `0x29`;
41842	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41843	pub const _MM_PERM_ACCC: _MM_PERM_ENUM = `0x2A`;
41844	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41845	pub const _MM_PERM_ACCD: _MM_PERM_ENUM = `0x2B`;
41846	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41847	pub const _MM_PERM_ACDA: _MM_PERM_ENUM = `0x2C`;
41848	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41849	pub const _MM_PERM_ACDB: _MM_PERM_ENUM = `0x2D`;
41850	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41851	pub const _MM_PERM_ACDC: _MM_PERM_ENUM = `0x2E`;
41852	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41853	pub const _MM_PERM_ACDD: _MM_PERM_ENUM = `0x2F`;
41854	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41855	pub const _MM_PERM_ADAA: _MM_PERM_ENUM = `0x30`;
41856	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41857	pub const _MM_PERM_ADAB: _MM_PERM_ENUM = `0x31`;
41858	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41859	pub const _MM_PERM_ADAC: _MM_PERM_ENUM = `0x32`;
41860	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41861	pub const _MM_PERM_ADAD: _MM_PERM_ENUM = `0x33`;
41862	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41863	pub const _MM_PERM_ADBA: _MM_PERM_ENUM = `0x34`;
41864	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41865	pub const _MM_PERM_ADBB: _MM_PERM_ENUM = `0x35`;
41866	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41867	pub const _MM_PERM_ADBC: _MM_PERM_ENUM = `0x36`;
41868	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41869	pub const _MM_PERM_ADBD: _MM_PERM_ENUM = `0x37`;
41870	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41871	pub const _MM_PERM_ADCA: _MM_PERM_ENUM = `0x38`;
41872	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41873	pub const _MM_PERM_ADCB: _MM_PERM_ENUM = `0x39`;
41874	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41875	pub const _MM_PERM_ADCC: _MM_PERM_ENUM = `0x3A`;
41876	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41877	pub const _MM_PERM_ADCD: _MM_PERM_ENUM = `0x3B`;
41878	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41879	pub const _MM_PERM_ADDA: _MM_PERM_ENUM = `0x3C`;
41880	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41881	pub const _MM_PERM_ADDB: _MM_PERM_ENUM = `0x3D`;
41882	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41883	pub const _MM_PERM_ADDC: _MM_PERM_ENUM = `0x3E`;
41884	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41885	pub const _MM_PERM_ADDD: _MM_PERM_ENUM = `0x3F`;
41886	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41887	pub const _MM_PERM_BAAA: _MM_PERM_ENUM = `0x40`;
41888	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41889	pub const _MM_PERM_BAAB: _MM_PERM_ENUM = `0x41`;
41890	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41891	pub const _MM_PERM_BAAC: _MM_PERM_ENUM = `0x42`;
41892	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41893	pub const _MM_PERM_BAAD: _MM_PERM_ENUM = `0x43`;
41894	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41895	pub const _MM_PERM_BABA: _MM_PERM_ENUM = `0x44`;
41896	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41897	pub const _MM_PERM_BABB: _MM_PERM_ENUM = `0x45`;
41898	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41899	pub const _MM_PERM_BABC: _MM_PERM_ENUM = `0x46`;
41900	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41901	pub const _MM_PERM_BABD: _MM_PERM_ENUM = `0x47`;
41902	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41903	pub const _MM_PERM_BACA: _MM_PERM_ENUM = `0x48`;
41904	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41905	pub const _MM_PERM_BACB: _MM_PERM_ENUM = `0x49`;
41906	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41907	pub const _MM_PERM_BACC: _MM_PERM_ENUM = `0x4A`;
41908	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41909	pub const _MM_PERM_BACD: _MM_PERM_ENUM = `0x4B`;
41910	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41911	pub const _MM_PERM_BADA: _MM_PERM_ENUM = `0x4C`;
41912	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41913	pub const _MM_PERM_BADB: _MM_PERM_ENUM = `0x4D`;
41914	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41915	pub const _MM_PERM_BADC: _MM_PERM_ENUM = `0x4E`;
41916	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41917	pub const _MM_PERM_BADD: _MM_PERM_ENUM = `0x4F`;
41918	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41919	pub const _MM_PERM_BBAA: _MM_PERM_ENUM = `0x50`;
41920	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41921	pub const _MM_PERM_BBAB: _MM_PERM_ENUM = `0x51`;
41922	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41923	pub const _MM_PERM_BBAC: _MM_PERM_ENUM = `0x52`;
41924	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41925	pub const _MM_PERM_BBAD: _MM_PERM_ENUM = `0x53`;
41926	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41927	pub const _MM_PERM_BBBA: _MM_PERM_ENUM = `0x54`;
41928	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41929	pub const _MM_PERM_BBBB: _MM_PERM_ENUM = `0x55`;
41930	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41931	pub const _MM_PERM_BBBC: _MM_PERM_ENUM = `0x56`;
41932	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41933	pub const _MM_PERM_BBBD: _MM_PERM_ENUM = `0x57`;
41934	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41935	pub const _MM_PERM_BBCA: _MM_PERM_ENUM = `0x58`;
41936	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41937	pub const _MM_PERM_BBCB: _MM_PERM_ENUM = `0x59`;
41938	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41939	pub const _MM_PERM_BBCC: _MM_PERM_ENUM = `0x5A`;
41940	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41941	pub const _MM_PERM_BBCD: _MM_PERM_ENUM = `0x5B`;
41942	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41943	pub const _MM_PERM_BBDA: _MM_PERM_ENUM = `0x5C`;
41944	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41945	pub const _MM_PERM_BBDB: _MM_PERM_ENUM = `0x5D`;
41946	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41947	pub const _MM_PERM_BBDC: _MM_PERM_ENUM = `0x5E`;
41948	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41949	pub const _MM_PERM_BBDD: _MM_PERM_ENUM = `0x5F`;
41950	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41951	pub const _MM_PERM_BCAA: _MM_PERM_ENUM = `0x60`;
41952	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41953	pub const _MM_PERM_BCAB: _MM_PERM_ENUM = `0x61`;
41954	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41955	pub const _MM_PERM_BCAC: _MM_PERM_ENUM = `0x62`;
41956	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41957	pub const _MM_PERM_BCAD: _MM_PERM_ENUM = `0x63`;
41958	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41959	pub const _MM_PERM_BCBA: _MM_PERM_ENUM = `0x64`;
41960	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41961	pub const _MM_PERM_BCBB: _MM_PERM_ENUM = `0x65`;
41962	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41963	pub const _MM_PERM_BCBC: _MM_PERM_ENUM = `0x66`;
41964	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41965	pub const _MM_PERM_BCBD: _MM_PERM_ENUM = `0x67`;
41966	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41967	pub const _MM_PERM_BCCA: _MM_PERM_ENUM = `0x68`;
41968	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41969	pub const _MM_PERM_BCCB: _MM_PERM_ENUM = `0x69`;
41970	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41971	pub const _MM_PERM_BCCC: _MM_PERM_ENUM = `0x6A`;
41972	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41973	pub const _MM_PERM_BCCD: _MM_PERM_ENUM = `0x6B`;
41974	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41975	pub const _MM_PERM_BCDA: _MM_PERM_ENUM = `0x6C`;
41976	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41977	pub const _MM_PERM_BCDB: _MM_PERM_ENUM = `0x6D`;
41978	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41979	pub const _MM_PERM_BCDC: _MM_PERM_ENUM = `0x6E`;
41980	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41981	pub const _MM_PERM_BCDD: _MM_PERM_ENUM = `0x6F`;
41982	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41983	pub const _MM_PERM_BDAA: _MM_PERM_ENUM = `0x70`;
41984	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41985	pub const _MM_PERM_BDAB: _MM_PERM_ENUM = `0x71`;
41986	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41987	pub const _MM_PERM_BDAC: _MM_PERM_ENUM = `0x72`;
41988	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41989	pub const _MM_PERM_BDAD: _MM_PERM_ENUM = `0x73`;
41990	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41991	pub const _MM_PERM_BDBA: _MM_PERM_ENUM = `0x74`;
41992	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41993	pub const _MM_PERM_BDBB: _MM_PERM_ENUM = `0x75`;
41994	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41995	pub const _MM_PERM_BDBC: _MM_PERM_ENUM = `0x76`;
41996	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41997	pub const _MM_PERM_BDBD: _MM_PERM_ENUM = `0x77`;
41998	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41999	pub const _MM_PERM_BDCA: _MM_PERM_ENUM = `0x78`;
42000	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42001	pub const _MM_PERM_BDCB: _MM_PERM_ENUM = `0x79`;
42002	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42003	pub const _MM_PERM_BDCC: _MM_PERM_ENUM = `0x7A`;
42004	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42005	pub const _MM_PERM_BDCD: _MM_PERM_ENUM = `0x7B`;
42006	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42007	pub const _MM_PERM_BDDA: _MM_PERM_ENUM = `0x7C`;
42008	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42009	pub const _MM_PERM_BDDB: _MM_PERM_ENUM = `0x7D`;
42010	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42011	pub const _MM_PERM_BDDC: _MM_PERM_ENUM = `0x7E`;
42012	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42013	pub const _MM_PERM_BDDD: _MM_PERM_ENUM = `0x7F`;
42014	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42015	pub const _MM_PERM_CAAA: _MM_PERM_ENUM = `0x80`;
42016	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42017	pub const _MM_PERM_CAAB: _MM_PERM_ENUM = `0x81`;
42018	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42019	pub const _MM_PERM_CAAC: _MM_PERM_ENUM = `0x82`;
42020	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42021	pub const _MM_PERM_CAAD: _MM_PERM_ENUM = `0x83`;
42022	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42023	pub const _MM_PERM_CABA: _MM_PERM_ENUM = `0x84`;
42024	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42025	pub const _MM_PERM_CABB: _MM_PERM_ENUM = `0x85`;
42026	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42027	pub const _MM_PERM_CABC: _MM_PERM_ENUM = `0x86`;
42028	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42029	pub const _MM_PERM_CABD: _MM_PERM_ENUM = `0x87`;
42030	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42031	pub const _MM_PERM_CACA: _MM_PERM_ENUM = `0x88`;
42032	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42033	pub const _MM_PERM_CACB: _MM_PERM_ENUM = `0x89`;
42034	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42035	pub const _MM_PERM_CACC: _MM_PERM_ENUM = `0x8A`;
42036	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42037	pub const _MM_PERM_CACD: _MM_PERM_ENUM = `0x8B`;
42038	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42039	pub const _MM_PERM_CADA: _MM_PERM_ENUM = `0x8C`;
42040	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42041	pub const _MM_PERM_CADB: _MM_PERM_ENUM = `0x8D`;
42042	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42043	pub const _MM_PERM_CADC: _MM_PERM_ENUM = `0x8E`;
42044	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42045	pub const _MM_PERM_CADD: _MM_PERM_ENUM = `0x8F`;
42046	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42047	pub const _MM_PERM_CBAA: _MM_PERM_ENUM = `0x90`;
42048	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42049	pub const _MM_PERM_CBAB: _MM_PERM_ENUM = `0x91`;
42050	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42051	pub const _MM_PERM_CBAC: _MM_PERM_ENUM = `0x92`;
42052	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42053	pub const _MM_PERM_CBAD: _MM_PERM_ENUM = `0x93`;
42054	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42055	pub const _MM_PERM_CBBA: _MM_PERM_ENUM = `0x94`;
42056	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42057	pub const _MM_PERM_CBBB: _MM_PERM_ENUM = `0x95`;
42058	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42059	pub const _MM_PERM_CBBC: _MM_PERM_ENUM = `0x96`;
42060	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42061	pub const _MM_PERM_CBBD: _MM_PERM_ENUM = `0x97`;
42062	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42063	pub const _MM_PERM_CBCA: _MM_PERM_ENUM = `0x98`;
42064	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42065	pub const _MM_PERM_CBCB: _MM_PERM_ENUM = `0x99`;
42066	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42067	pub const _MM_PERM_CBCC: _MM_PERM_ENUM = `0x9A`;
42068	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42069	pub const _MM_PERM_CBCD: _MM_PERM_ENUM = `0x9B`;
42070	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42071	pub const _MM_PERM_CBDA: _MM_PERM_ENUM = `0x9C`;
42072	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42073	pub const _MM_PERM_CBDB: _MM_PERM_ENUM = `0x9D`;
42074	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42075	pub const _MM_PERM_CBDC: _MM_PERM_ENUM = `0x9E`;
42076	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42077	pub const _MM_PERM_CBDD: _MM_PERM_ENUM = `0x9F`;
42078	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42079	pub const _MM_PERM_CCAA: _MM_PERM_ENUM = `0xA0`;
42080	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42081	pub const _MM_PERM_CCAB: _MM_PERM_ENUM = `0xA1`;
42082	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42083	pub const _MM_PERM_CCAC: _MM_PERM_ENUM = `0xA2`;
42084	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42085	pub const _MM_PERM_CCAD: _MM_PERM_ENUM = `0xA3`;
42086	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42087	pub const _MM_PERM_CCBA: _MM_PERM_ENUM = `0xA4`;
42088	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42089	pub const _MM_PERM_CCBB: _MM_PERM_ENUM = `0xA5`;
42090	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42091	pub const _MM_PERM_CCBC: _MM_PERM_ENUM = `0xA6`;
42092	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42093	pub const _MM_PERM_CCBD: _MM_PERM_ENUM = `0xA7`;
42094	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42095	pub const _MM_PERM_CCCA: _MM_PERM_ENUM = `0xA8`;
42096	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42097	pub const _MM_PERM_CCCB: _MM_PERM_ENUM = `0xA9`;
42098	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42099	pub const _MM_PERM_CCCC: _MM_PERM_ENUM = `0xAA`;
42100	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42101	pub const _MM_PERM_CCCD: _MM_PERM_ENUM = `0xAB`;
42102	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42103	pub const _MM_PERM_CCDA: _MM_PERM_ENUM = `0xAC`;
42104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42105	pub const _MM_PERM_CCDB: _MM_PERM_ENUM = `0xAD`;
42106	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42107	pub const _MM_PERM_CCDC: _MM_PERM_ENUM = `0xAE`;
42108	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42109	pub const _MM_PERM_CCDD: _MM_PERM_ENUM = `0xAF`;
42110	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42111	pub const _MM_PERM_CDAA: _MM_PERM_ENUM = `0xB0`;
42112	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42113	pub const _MM_PERM_CDAB: _MM_PERM_ENUM = `0xB1`;
42114	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42115	pub const _MM_PERM_CDAC: _MM_PERM_ENUM = `0xB2`;
42116	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42117	pub const _MM_PERM_CDAD: _MM_PERM_ENUM = `0xB3`;
42118	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42119	pub const _MM_PERM_CDBA: _MM_PERM_ENUM = `0xB4`;
42120	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42121	pub const _MM_PERM_CDBB: _MM_PERM_ENUM = `0xB5`;
42122	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42123	pub const _MM_PERM_CDBC: _MM_PERM_ENUM = `0xB6`;
42124	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42125	pub const _MM_PERM_CDBD: _MM_PERM_ENUM = `0xB7`;
42126	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42127	pub const _MM_PERM_CDCA: _MM_PERM_ENUM = `0xB8`;
42128	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42129	pub const _MM_PERM_CDCB: _MM_PERM_ENUM = `0xB9`;
42130	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42131	pub const _MM_PERM_CDCC: _MM_PERM_ENUM = `0xBA`;
42132	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42133	pub const _MM_PERM_CDCD: _MM_PERM_ENUM = `0xBB`;
42134	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42135	pub const _MM_PERM_CDDA: _MM_PERM_ENUM = `0xBC`;
42136	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42137	pub const _MM_PERM_CDDB: _MM_PERM_ENUM = `0xBD`;
42138	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42139	pub const _MM_PERM_CDDC: _MM_PERM_ENUM = `0xBE`;
42140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42141	pub const _MM_PERM_CDDD: _MM_PERM_ENUM = `0xBF`;
42142	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42143	pub const _MM_PERM_DAAA: _MM_PERM_ENUM = `0xC0`;
42144	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42145	pub const _MM_PERM_DAAB: _MM_PERM_ENUM = `0xC1`;
42146	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42147	pub const _MM_PERM_DAAC: _MM_PERM_ENUM = `0xC2`;
42148	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42149	pub const _MM_PERM_DAAD: _MM_PERM_ENUM = `0xC3`;
42150	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42151	pub const _MM_PERM_DABA: _MM_PERM_ENUM = `0xC4`;
42152	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42153	pub const _MM_PERM_DABB: _MM_PERM_ENUM = `0xC5`;
42154	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42155	pub const _MM_PERM_DABC: _MM_PERM_ENUM = `0xC6`;
42156	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42157	pub const _MM_PERM_DABD: _MM_PERM_ENUM = `0xC7`;
42158	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42159	pub const _MM_PERM_DACA: _MM_PERM_ENUM = `0xC8`;
42160	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42161	pub const _MM_PERM_DACB: _MM_PERM_ENUM = `0xC9`;
42162	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42163	pub const _MM_PERM_DACC: _MM_PERM_ENUM = `0xCA`;
42164	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42165	pub const _MM_PERM_DACD: _MM_PERM_ENUM = `0xCB`;
42166	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42167	pub const _MM_PERM_DADA: _MM_PERM_ENUM = `0xCC`;
42168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42169	pub const _MM_PERM_DADB: _MM_PERM_ENUM = `0xCD`;
42170	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42171	pub const _MM_PERM_DADC: _MM_PERM_ENUM = `0xCE`;
42172	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42173	pub const _MM_PERM_DADD: _MM_PERM_ENUM = `0xCF`;
42174	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42175	pub const _MM_PERM_DBAA: _MM_PERM_ENUM = `0xD0`;
42176	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42177	pub const _MM_PERM_DBAB: _MM_PERM_ENUM = `0xD1`;
42178	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42179	pub const _MM_PERM_DBAC: _MM_PERM_ENUM = `0xD2`;
42180	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42181	pub const _MM_PERM_DBAD: _MM_PERM_ENUM = `0xD3`;
42182	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42183	pub const _MM_PERM_DBBA: _MM_PERM_ENUM = `0xD4`;
42184	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42185	pub const _MM_PERM_DBBB: _MM_PERM_ENUM = `0xD5`;
42186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42187	pub const _MM_PERM_DBBC: _MM_PERM_ENUM = `0xD6`;
42188	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42189	pub const _MM_PERM_DBBD: _MM_PERM_ENUM = `0xD7`;
42190	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42191	pub const _MM_PERM_DBCA: _MM_PERM_ENUM = `0xD8`;
42192	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42193	pub const _MM_PERM_DBCB: _MM_PERM_ENUM = `0xD9`;
42194	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42195	pub const _MM_PERM_DBCC: _MM_PERM_ENUM = `0xDA`;
42196	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42197	pub const _MM_PERM_DBCD: _MM_PERM_ENUM = `0xDB`;
42198	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42199	pub const _MM_PERM_DBDA: _MM_PERM_ENUM = `0xDC`;
42200	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42201	pub const _MM_PERM_DBDB: _MM_PERM_ENUM = `0xDD`;
42202	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42203	pub const _MM_PERM_DBDC: _MM_PERM_ENUM = `0xDE`;
42204	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42205	pub const _MM_PERM_DBDD: _MM_PERM_ENUM = `0xDF`;
42206	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42207	pub const _MM_PERM_DCAA: _MM_PERM_ENUM = `0xE0`;
42208	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42209	pub const _MM_PERM_DCAB: _MM_PERM_ENUM = `0xE1`;
42210	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42211	pub const _MM_PERM_DCAC: _MM_PERM_ENUM = `0xE2`;
42212	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42213	pub const _MM_PERM_DCAD: _MM_PERM_ENUM = `0xE3`;
42214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42215	pub const _MM_PERM_DCBA: _MM_PERM_ENUM = `0xE4`;
42216	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42217	pub const _MM_PERM_DCBB: _MM_PERM_ENUM = `0xE5`;
42218	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42219	pub const _MM_PERM_DCBC: _MM_PERM_ENUM = `0xE6`;
42220	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42221	pub const _MM_PERM_DCBD: _MM_PERM_ENUM = `0xE7`;
42222	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42223	pub const _MM_PERM_DCCA: _MM_PERM_ENUM = `0xE8`;
42224	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42225	pub const _MM_PERM_DCCB: _MM_PERM_ENUM = `0xE9`;
42226	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42227	pub const _MM_PERM_DCCC: _MM_PERM_ENUM = `0xEA`;
42228	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42229	pub const _MM_PERM_DCCD: _MM_PERM_ENUM = `0xEB`;
42230	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42231	pub const _MM_PERM_DCDA: _MM_PERM_ENUM = `0xEC`;
42232	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42233	pub const _MM_PERM_DCDB: _MM_PERM_ENUM = `0xED`;
42234	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42235	pub const _MM_PERM_DCDC: _MM_PERM_ENUM = `0xEE`;
42236	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42237	pub const _MM_PERM_DCDD: _MM_PERM_ENUM = `0xEF`;
42238	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42239	pub const _MM_PERM_DDAA: _MM_PERM_ENUM = `0xF0`;
42240	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42241	pub const _MM_PERM_DDAB: _MM_PERM_ENUM = `0xF1`;
42242	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42243	pub const _MM_PERM_DDAC: _MM_PERM_ENUM = `0xF2`;
42244	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42245	pub const _MM_PERM_DDAD: _MM_PERM_ENUM = `0xF3`;
42246	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42247	pub const _MM_PERM_DDBA: _MM_PERM_ENUM = `0xF4`;
42248	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42249	pub const _MM_PERM_DDBB: _MM_PERM_ENUM = `0xF5`;
42250	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42251	pub const _MM_PERM_DDBC: _MM_PERM_ENUM = `0xF6`;
42252	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42253	pub const _MM_PERM_DDBD: _MM_PERM_ENUM = `0xF7`;
42254	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42255	pub const _MM_PERM_DDCA: _MM_PERM_ENUM = `0xF8`;
42256	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42257	pub const _MM_PERM_DDCB: _MM_PERM_ENUM = `0xF9`;
42258	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42259	pub const _MM_PERM_DDCC: _MM_PERM_ENUM = `0xFA`;
42260	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42261	pub const _MM_PERM_DDCD: _MM_PERM_ENUM = `0xFB`;
42262	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42263	pub const _MM_PERM_DDDA: _MM_PERM_ENUM = `0xFC`;
42264	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42265	pub const _MM_PERM_DDDB: _MM_PERM_ENUM = `0xFD`;
42266	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42267	pub const _MM_PERM_DDDC: _MM_PERM_ENUM = `0xFE`;
42268	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42269	pub const _MM_PERM_DDDD: _MM_PERM_ENUM = `0xFF`;
42270
42271	#[allow(improper_ctypes)]
42272	unsafe extern "C" {
42273	#[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42274	unsafefn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42275	#[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42276	unsafefn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42277
42278	#[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42279	unsafefn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42280	#[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42281	unsafefn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42282
42283	#[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42284	unsafefn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42285	#[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42286	unsafefn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42287
42288	#[link_name = "llvm.x86.avx512.add.ps.512"]
42289	unsafefn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42290	#[link_name = "llvm.x86.avx512.add.pd.512"]
42291	unsafefn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42292	#[link_name = "llvm.x86.avx512.sub.ps.512"]
42293	unsafefn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42294	#[link_name = "llvm.x86.avx512.sub.pd.512"]
42295	unsafefn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42296	#[link_name = "llvm.x86.avx512.mul.ps.512"]
42297	unsafefn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42298	#[link_name = "llvm.x86.avx512.mul.pd.512"]
42299	unsafefn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42300	#[link_name = "llvm.x86.avx512.div.ps.512"]
42301	unsafefn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42302	#[link_name = "llvm.x86.avx512.div.pd.512"]
42303	unsafefn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42304
42305	#[link_name = "llvm.x86.avx512.max.ps.512"]
42306	unsafefn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42307	#[link_name = "llvm.x86.avx512.max.pd.512"]
42308	unsafefn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42309	#[link_name = "llvm.x86.avx512.min.ps.512"]
42310	unsafefn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42311	#[link_name = "llvm.x86.avx512.min.pd.512"]
42312	unsafefn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42313
42314	#[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42315	unsafefn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42316
42317	#[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42318	unsafefn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42319	#[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42320	unsafefn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42321
42322	#[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42323	unsafefn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42324	#[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42325	unsafefn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42326	#[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42327	unsafefn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42328
42329	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42330	unsafefn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42331	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42332	unsafefn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42333	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42334	unsafefn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42335
42336	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42337	unsafefn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42338	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42339	unsafefn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42340	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42341	unsafefn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42342
42343	#[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42344	unsafefn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42345	#[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42346	unsafefn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42347	#[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42348	unsafefn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42349
42350	#[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42351	unsafefn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42352	#[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42353	unsafefn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42354	#[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42355	unsafefn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42356
42357	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42358	unsafefn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42359	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42360	unsafefn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42361	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42362	unsafefn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42363
42364	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42365	unsafefn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42366	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42367	unsafefn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42368	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42369	unsafefn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42370
42371	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42372	unsafefn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42373	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42374	unsafefn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42375	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42376	unsafefn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42377
42378	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42379	unsafefn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42380	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42381	unsafefn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42382	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42383	unsafefn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42384
42385	#[link_name = "llvm.x86.avx512.pternlog.d.512"]
42386	unsafefn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42387	#[link_name = "llvm.x86.avx512.pternlog.d.256"]
42388	unsafefn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42389	#[link_name = "llvm.x86.avx512.pternlog.d.128"]
42390	unsafefn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42391
42392	#[link_name = "llvm.x86.avx512.pternlog.q.512"]
42393	unsafefn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42394	#[link_name = "llvm.x86.avx512.pternlog.q.256"]
42395	unsafefn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42396	#[link_name = "llvm.x86.avx512.pternlog.q.128"]
42397	unsafefn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42398
42399	#[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42400	unsafefn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42401	#[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42402	unsafefn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42403	#[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42404	unsafefn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42405
42406	#[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42407	unsafefn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42408	#[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42409	unsafefn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42410	#[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42411	unsafefn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42412
42413	#[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42414	unsafefn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42415	#[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42416	unsafefn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42417	#[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42418	unsafefn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42419
42420	#[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42421	unsafefn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42422	#[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42423	unsafefn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42424	#[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42425	unsafefn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42426
42427	#[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42428	unsafefn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42429	#[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42430	unsafefn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42431	#[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42432	unsafefn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42433
42434	#[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42435	unsafefn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42436	#[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42437	unsafefn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42438	#[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42439	unsafefn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42440
42441	#[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42442	unsafefn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42443
42444	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42445	unsafefn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42446	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42447	unsafefn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42448	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42449	unsafefn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42450
42451	#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42452	unsafefn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42453	#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42454	unsafefn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42455
42456	#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42457	unsafefn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42458
42459	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42460	unsafefn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42461	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42462	unsafefn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42463	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42464	unsafefn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42465
42466	#[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42467	unsafefn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42468	#[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42469	unsafefn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42470
42471	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42472	unsafefn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
42473	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42474	unsafefn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
42475	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42476	unsafefn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
42477
42478	#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42479	unsafefn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42480
42481	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42482	unsafefn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42483	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42484	unsafefn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42485	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42486	unsafefn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42487
42488	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42489	unsafefn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42490	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42491	unsafefn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42492	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42493	unsafefn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42494
42495	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42496	unsafefn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42497	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42498	unsafefn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42499	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42500	unsafefn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42501
42502	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42503	unsafefn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42504	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42505	unsafefn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42506	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42507	unsafefn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42508
42509	#[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42510	unsafefn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42511	#[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42512	unsafefn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42513	#[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42514	unsafefn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42515
42516	#[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42517	unsafefn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42518	#[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42519	unsafefn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42520	#[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42521	unsafefn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42522	#[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42523	unsafefn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42524	#[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42525	unsafefn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42526
42527	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42528	unsafefn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42529	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42530	unsafefn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42531	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42532	unsafefn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42533
42534	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42535	unsafefn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42536	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42537	unsafefn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42538	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42539	unsafefn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42540
42541	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42542	unsafefn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42543	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42544	unsafefn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42545	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42546	unsafefn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42547
42548	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42549	unsafefn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42550	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42551	unsafefn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42552	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42553	unsafefn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42554
42555	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42556	unsafefn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42557	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42558	unsafefn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42559	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42560	unsafefn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42561
42562	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42563	unsafefn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42564	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42565	unsafefn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42566	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42567	unsafefn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42568
42569	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42570	unsafefn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42571	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42572	unsafefn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42573	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42574	unsafefn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42575
42576	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42577	unsafefn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42578	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42579	unsafefn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42580	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42581	unsafefn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42582
42583	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42584	unsafefn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42585	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42586	unsafefn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42587	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42588	unsafefn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42589
42590	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42591	unsafefn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42592	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42593	unsafefn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42594	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42595	unsafefn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42596
42597	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42598	unsafefn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42599	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42600	unsafefn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42601	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42602	unsafefn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42603
42604	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42605	unsafefn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42606	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42607	unsafefn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42608	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42609	unsafefn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42610
42611	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42612	unsafefn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42613	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42614	unsafefn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42615	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42616	unsafefn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42617
42618	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42619	unsafefn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42620	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42621	unsafefn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42622	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42623	unsafefn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42624
42625	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42626	unsafefn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42627	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42628	unsafefn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42629	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42630	unsafefn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42631
42632	#[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42633	unsafefn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42634
42635	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42636	unsafefn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42637	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42638	unsafefn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42639	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42640	unsafefn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42641
42642	#[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42643	unsafefn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42644	#[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42645	unsafefn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42646	#[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42647	unsafefn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42648
42649	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42650	unsafefn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42651	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42652	unsafefn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42653	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42654	unsafefn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42655
42656	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42657	unsafefn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42658	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42659	unsafefn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42660	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42661	unsafefn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42662
42663	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42664	unsafefn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42665	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42666	unsafefn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42667	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42668	unsafefn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42669
42670	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42671	unsafefn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42672	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42673	unsafefn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42674	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42675	unsafefn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42676
42677	#[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42678	unsafefn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42679	#[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42680	unsafefn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42681	#[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42682	unsafefn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42683
42684	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42685	unsafefn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42686	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42687	unsafefn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42688	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42689	unsafefn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42690
42691	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42692	unsafefn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42693	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42694	unsafefn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42695	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42696	unsafefn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42697
42698	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42699	unsafefn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42700	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42701	unsafefn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42702	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42703	unsafefn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42704
42705	#[link_name = "llvm.x86.avx512.gather.dpd.512"]
42706	unsafefn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42707	#[link_name = "llvm.x86.avx512.gather.dps.512"]
42708	unsafefn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42709	#[link_name = "llvm.x86.avx512.gather.qpd.512"]
42710	unsafefn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42711	#[link_name = "llvm.x86.avx512.gather.qps.512"]
42712	unsafefn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42713	#[link_name = "llvm.x86.avx512.gather.dpq.512"]
42714	unsafefn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42715	#[link_name = "llvm.x86.avx512.gather.dpi.512"]
42716	unsafefn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42717	#[link_name = "llvm.x86.avx512.gather.qpq.512"]
42718	unsafefn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42719	#[link_name = "llvm.x86.avx512.gather.qpi.512"]
42720	unsafefn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42721
42722	#[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42723	unsafefn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42724	#[link_name = "llvm.x86.avx512.scatter.dps.512"]
42725	unsafefn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42726	#[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42727	unsafefn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42728	#[link_name = "llvm.x86.avx512.scatter.qps.512"]
42729	unsafefn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42730	#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42731	unsafefn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42732
42733	#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42734	unsafefn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42735	#[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42736	unsafefn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42737	#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42738	unsafefn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42739
42740	#[link_name = "llvm.x86.avx512.scattersiv4.si"]
42741	unsafefn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42742	#[link_name = "llvm.x86.avx512.scattersiv2.di"]
42743	unsafefn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42744	#[link_name = "llvm.x86.avx512.scattersiv2.df"]
42745	unsafefn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42746	#[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42747	unsafefn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42748	#[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42749	unsafefn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42750	#[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42751	unsafefn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42752	#[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42753	unsafefn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42754	#[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42755	unsafefn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42756
42757	#[link_name = "llvm.x86.avx512.scattersiv8.si"]
42758	unsafefn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42759	#[link_name = "llvm.x86.avx512.scattersiv4.di"]
42760	unsafefn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42761	#[link_name = "llvm.x86.avx512.scattersiv4.df"]
42762	unsafefn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42763	#[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42764	unsafefn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42765	#[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42766	unsafefn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42767	#[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42768	unsafefn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42769	#[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42770	unsafefn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42771	#[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42772	unsafefn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42773
42774	#[link_name = "llvm.x86.avx512.gather3siv4.si"]
42775	unsafefn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42776	#[link_name = "llvm.x86.avx512.gather3siv2.di"]
42777	unsafefn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42778	#[link_name = "llvm.x86.avx512.gather3siv2.df"]
42779	unsafefn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42780	#[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42781	unsafefn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42782	#[link_name = "llvm.x86.avx512.gather3div4.si"]
42783	unsafefn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42784	#[link_name = "llvm.x86.avx512.gather3div2.di"]
42785	unsafefn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42786	#[link_name = "llvm.x86.avx512.gather3div2.df"]
42787	unsafefn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42788	#[link_name = "llvm.x86.avx512.gather3div4.sf"]
42789	unsafefn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42790
42791	#[link_name = "llvm.x86.avx512.gather3siv8.si"]
42792	unsafefn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42793	#[link_name = "llvm.x86.avx512.gather3siv4.di"]
42794	unsafefn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42795	#[link_name = "llvm.x86.avx512.gather3siv4.df"]
42796	unsafefn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42797	#[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42798	unsafefn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42799	#[link_name = "llvm.x86.avx512.gather3div8.si"]
42800	unsafefn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42801	#[link_name = "llvm.x86.avx512.gather3div4.di"]
42802	unsafefn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42803	#[link_name = "llvm.x86.avx512.gather3div4.df"]
42804	unsafefn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42805	#[link_name = "llvm.x86.avx512.gather3div8.sf"]
42806	unsafefn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42807
42808	#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42809	unsafefn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42810	#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42811	unsafefn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42812
42813	#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42814	unsafefn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42815	#[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42816	unsafefn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42817	#[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42818	unsafefn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42819
42820	#[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42821	unsafefn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42822	#[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42823	unsafefn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42824	#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42825	unsafefn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42826
42827	#[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42828	unsafefn vprold(a: i32x16, i8: i32) -> i32x16;
42829	#[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42830	unsafefn vprold256(a: i32x8, i8: i32) -> i32x8;
42831	#[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42832	unsafefn vprold128(a: i32x4, i8: i32) -> i32x4;
42833
42834	#[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42835	unsafefn vprord(a: i32x16, i8: i32) -> i32x16;
42836	#[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42837	unsafefn vprord256(a: i32x8, i8: i32) -> i32x8;
42838	#[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42839	unsafefn vprord128(a: i32x4, i8: i32) -> i32x4;
42840
42841	#[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42842	unsafefn vprolq(a: i64x8, i8: i32) -> i64x8;
42843	#[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42844	unsafefn vprolq256(a: i64x4, i8: i32) -> i64x4;
42845	#[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42846	unsafefn vprolq128(a: i64x2, i8: i32) -> i64x2;
42847
42848	#[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42849	unsafefn vprorq(a: i64x8, i8: i32) -> i64x8;
42850	#[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42851	unsafefn vprorq256(a: i64x4, i8: i32) -> i64x4;
42852	#[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42853	unsafefn vprorq128(a: i64x2, i8: i32) -> i64x2;
42854
42855	#[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42856	unsafefn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42857	#[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42858	unsafefn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42859	#[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42860	unsafefn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42861
42862	#[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42863	unsafefn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42864	#[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42865	unsafefn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42866	#[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42867	unsafefn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42868
42869	#[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42870	unsafefn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42871	#[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42872	unsafefn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42873	#[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42874	unsafefn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42875
42876	#[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42877	unsafefn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42878	#[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42879	unsafefn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42880	#[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42881	unsafefn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42882
42883	#[link_name = "llvm.x86.avx512.psllv.d.512"]
42884	unsafefn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42885	#[link_name = "llvm.x86.avx512.psrlv.d.512"]
42886	unsafefn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42887	#[link_name = "llvm.x86.avx512.psllv.q.512"]
42888	unsafefn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42889	#[link_name = "llvm.x86.avx512.psrlv.q.512"]
42890	unsafefn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42891
42892	#[link_name = "llvm.x86.avx512.psll.d.512"]
42893	unsafefn vpslld(a: i32x16, count: i32x4) -> i32x16;
42894	#[link_name = "llvm.x86.avx512.psrl.d.512"]
42895	unsafefn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42896	#[link_name = "llvm.x86.avx512.psll.q.512"]
42897	unsafefn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42898	#[link_name = "llvm.x86.avx512.psrl.q.512"]
42899	unsafefn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42900
42901	#[link_name = "llvm.x86.avx512.psra.d.512"]
42902	unsafefn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42903
42904	#[link_name = "llvm.x86.avx512.psra.q.512"]
42905	unsafefn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42906	#[link_name = "llvm.x86.avx512.psra.q.256"]
42907	unsafefn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42908	#[link_name = "llvm.x86.avx512.psra.q.128"]
42909	unsafefn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42910
42911	#[link_name = "llvm.x86.avx512.psrav.d.512"]
42912	unsafefn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42913
42914	#[link_name = "llvm.x86.avx512.psrav.q.512"]
42915	unsafefn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42916	#[link_name = "llvm.x86.avx512.psrav.q.256"]
42917	unsafefn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42918	#[link_name = "llvm.x86.avx512.psrav.q.128"]
42919	unsafefn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42920
42921	#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
42922	unsafefn vpermilps(a: f32x16, b: i32x16) -> f32x16;
42923	#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
42924	unsafefn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
42925
42926	#[link_name = "llvm.x86.avx512.permvar.si.512"]
42927	unsafefn vpermd(a: i32x16, idx: i32x16) -> i32x16;
42928
42929	#[link_name = "llvm.x86.avx512.permvar.di.512"]
42930	unsafefn vpermq(a: i64x8, idx: i64x8) -> i64x8;
42931	#[link_name = "llvm.x86.avx512.permvar.di.256"]
42932	unsafefn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
42933
42934	#[link_name = "llvm.x86.avx512.permvar.sf.512"]
42935	unsafefn vpermps(a: f32x16, idx: i32x16) -> f32x16;
42936
42937	#[link_name = "llvm.x86.avx512.permvar.df.512"]
42938	unsafefn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
42939	#[link_name = "llvm.x86.avx512.permvar.df.256"]
42940	unsafefn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
42941
42942	#[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
42943	unsafefn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
42944	#[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
42945	unsafefn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
42946	#[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
42947	unsafefn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
42948
42949	#[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
42950	unsafefn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
42951	#[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
42952	unsafefn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
42953	#[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
42954	unsafefn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
42955
42956	#[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
42957	unsafefn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
42958	#[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
42959	unsafefn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
42960	#[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
42961	unsafefn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
42962
42963	#[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
42964	unsafefn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
42965	#[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
42966	unsafefn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
42967	#[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
42968	unsafefn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
42969
42970	#[link_name = "llvm.x86.avx512.mask.compress.d.512"]
42971	unsafefn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
42972	#[link_name = "llvm.x86.avx512.mask.compress.d.256"]
42973	unsafefn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
42974	#[link_name = "llvm.x86.avx512.mask.compress.d.128"]
42975	unsafefn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
42976
42977	#[link_name = "llvm.x86.avx512.mask.compress.q.512"]
42978	unsafefn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
42979	#[link_name = "llvm.x86.avx512.mask.compress.q.256"]
42980	unsafefn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
42981	#[link_name = "llvm.x86.avx512.mask.compress.q.128"]
42982	unsafefn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
42983
42984	#[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
42985	unsafefn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
42986	#[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
42987	unsafefn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
42988	#[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
42989	unsafefn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
42990
42991	#[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
42992	unsafefn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
42993	#[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
42994	unsafefn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
42995	#[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
42996	unsafefn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
42997
42998	#[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
42999	unsafefn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43000	#[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43001	unsafefn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43002	#[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43003	unsafefn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43004
43005	#[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43006	unsafefn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43007	#[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43008	unsafefn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43009	#[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43010	unsafefn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43011
43012	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43013	unsafefn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43014	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43015	unsafefn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43016	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43017	unsafefn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43018
43019	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43020	unsafefn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43021	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43022	unsafefn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43023	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43024	unsafefn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43025
43026	#[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43027	unsafefn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43028	#[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43029	unsafefn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43030	#[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43031	unsafefn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43032
43033	#[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43034	unsafefn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43035	#[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43036	unsafefn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43037	#[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43038	unsafefn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43039
43040	#[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43041	unsafefn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43042	#[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43043	unsafefn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43044	#[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43045	unsafefn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43046
43047	#[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43048	unsafefn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43049	#[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43050	unsafefn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43051	#[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43052	unsafefn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43053
43054	#[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43055	unsafefn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43056	#[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43057	unsafefn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43058	#[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43059	unsafefn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43060	#[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43061	unsafefn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43062	#[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43063	unsafefn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43064	#[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43065	unsafefn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43066	#[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43067	unsafefn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43068	#[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43069	unsafefn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43070	#[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43071	unsafefn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43072	#[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43073	unsafefn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43074	#[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43075	unsafefn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43076	#[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43077	unsafefn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43078	#[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43079	unsafefn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43080	#[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43081	unsafefn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43082	#[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43083	unsafefn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43084	#[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43085	unsafefn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43086	#[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43087	unsafefn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43088	#[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43089	unsafefn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43090
43091	#[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43092	unsafefn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43093	#[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43094	unsafefn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43095	#[link_name = "llvm.x86.avx512.rcp14.ss"]
43096	unsafefn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43097	#[link_name = "llvm.x86.avx512.rcp14.sd"]
43098	unsafefn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43099
43100	#[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43101	unsafefn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43102	#[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43103	unsafefn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43104	#[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43105	unsafefn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43106	#[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43107	unsafefn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43108
43109	#[link_name = "llvm.x86.avx512.vfmadd.f32"]
43110	unsafefn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43111	#[link_name = "llvm.x86.avx512.vfmadd.f64"]
43112	unsafefn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43113
43114	#[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43115	unsafefn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43116	#[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43117	unsafefn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43118	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43119	unsafefn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43120	#[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43121	unsafefn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43122
43123	#[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43124	unsafefn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43125	#[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43126	unsafefn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43127
43128	#[link_name = "llvm.x86.avx512.vcvtss2si32"]
43129	unsafefn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43130	#[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43131	unsafefn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43132
43133	#[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43134	unsafefn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43135	#[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43136	unsafefn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43137
43138	#[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43139	unsafefn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43140
43141	#[link_name = "llvm.x86.avx512.cvtusi2ss"]
43142	unsafefn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43143
43144	#[link_name = "llvm.x86.avx512.cvttss2si"]
43145	unsafefn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43146	#[link_name = "llvm.x86.avx512.cvttss2usi"]
43147	unsafefn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43148
43149	#[link_name = "llvm.x86.avx512.cvttsd2si"]
43150	unsafefn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43151	#[link_name = "llvm.x86.avx512.cvttsd2usi"]
43152	unsafefn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43153
43154	#[link_name = "llvm.x86.avx512.vcomi.ss"]
43155	unsafefn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43156	#[link_name = "llvm.x86.avx512.vcomi.sd"]
43157	unsafefn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43158
43159	#[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43160	unsafefn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43161	#[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43162	unsafefn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43163	#[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43164	unsafefn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43165	#[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43166	unsafefn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43167	#[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43168	unsafefn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43169	#[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43170	unsafefn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43171	#[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43172	unsafefn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43173	#[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43174	unsafefn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43175	#[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43176	unsafefn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43177	#[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43178	unsafefn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43179	#[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43180	unsafefn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43181	#[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43182	unsafefn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43183
43184	#[link_name = "llvm.x86.avx512.mask.load.d.128"]
43185	unsafefn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43186	#[link_name = "llvm.x86.avx512.mask.load.q.128"]
43187	unsafefn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43188	#[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43189	unsafefn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43190	#[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43191	unsafefn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43192	#[link_name = "llvm.x86.avx512.mask.load.d.256"]
43193	unsafefn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43194	#[link_name = "llvm.x86.avx512.mask.load.q.256"]
43195	unsafefn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43196	#[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43197	unsafefn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43198	#[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43199	unsafefn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43200	#[link_name = "llvm.x86.avx512.mask.load.d.512"]
43201	unsafefn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43202	#[link_name = "llvm.x86.avx512.mask.load.q.512"]
43203	unsafefn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43204	#[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43205	unsafefn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43206	#[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43207	unsafefn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43208
43209	#[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43210	unsafefn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43211	#[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43212	unsafefn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43213	#[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43214	unsafefn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43215	#[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43216	unsafefn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43217	#[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43218	unsafefn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43219	#[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43220	unsafefn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43221	#[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43222	unsafefn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43223	#[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43224	unsafefn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43225	#[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43226	unsafefn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43227	#[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43228	unsafefn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43229	#[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43230	unsafefn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43231	#[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43232	unsafefn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43233
43234	#[link_name = "llvm.x86.avx512.mask.store.d.128"]
43235	unsafefn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43236	#[link_name = "llvm.x86.avx512.mask.store.q.128"]
43237	unsafefn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43238	#[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43239	unsafefn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43240	#[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43241	unsafefn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43242	#[link_name = "llvm.x86.avx512.mask.store.d.256"]
43243	unsafefn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43244	#[link_name = "llvm.x86.avx512.mask.store.q.256"]
43245	unsafefn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43246	#[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43247	unsafefn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43248	#[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43249	unsafefn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43250	#[link_name = "llvm.x86.avx512.mask.store.d.512"]
43251	unsafefn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43252	#[link_name = "llvm.x86.avx512.mask.store.q.512"]
43253	unsafefn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43254	#[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43255	unsafefn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43256	#[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43257	unsafefn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43258
43259	#[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43260	unsafefn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43261	#[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43262	unsafefn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43263	#[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43264	unsafefn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43265	#[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43266	unsafefn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43267	#[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43268	unsafefn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43269	#[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43270	unsafefn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43271	#[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43272	unsafefn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43273	#[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43274	unsafefn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43275	#[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43276	unsafefn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43277	#[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43278	unsafefn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43279	#[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43280	unsafefn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43281	#[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43282	unsafefn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43283
43284	}
43285
43286	#[cfg(test)]
43287	mod tests {
43288
43289	use stdarch_test::simd_test;
43290
43291	use crate::core_arch::x86::*;
43292	use crate::hint::black_box;
43293	use crate::mem::{self};
43294
43295	#[simd_test(enable = "avx512f")]
43296	unsafe fn test_mm512_abs_epi32() {
43297	#[rustfmt::skip]
43298	let a = _mm512_setr_epi32(
43299	`0`, `1`, `-1`, i32::MAX,
43300	i32::MIN, `100`, `-100`, `-32`,
43301	`0`, `1`, `-1`, i32::MAX,
43302	i32::MIN, `100`, `-100`, `-32`,
43303	);
43304	let r = _mm512_abs_epi32(a);
43305	#[rustfmt::skip]
43306	let e = _mm512_setr_epi32(
43307	`0`, `1`, `1`, i32::MAX,
43308	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
43309	`0`, `1`, `1`, i32::MAX,
43310	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
43311	);
43312	assert_eq_m512i(r, e);
43313	}
43314
43315	#[simd_test(enable = "avx512f")]
43316	unsafe fn test_mm512_mask_abs_epi32() {
43317	#[rustfmt::skip]
43318	let a = _mm512_setr_epi32(
43319	`0`, `1`, `-1`, i32::MAX,
43320	i32::MIN, `100`, `-100`, `-32`,
43321	`0`, `1`, `-1`, i32::MAX,
43322	i32::MIN, `100`, `-100`, `-32`,
43323	);
43324	let r = _mm512_mask_abs_epi32(a, `0`, a);
43325	assert_eq_m512i(r, a);
43326	let r = _mm512_mask_abs_epi32(a, `0b00000000_11111111`, a);
43327	#[rustfmt::skip]
43328	let e = _mm512_setr_epi32(
43329	`0`, `1`, `1`, i32::MAX,
43330	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
43331	`0`, `1`, `-1`, i32::MAX,
43332	i32::MIN, `100`, `-100`, `-32`,
43333	);
43334	assert_eq_m512i(r, e);
43335	}
43336
43337	#[simd_test(enable = "avx512f")]
43338	unsafe fn test_mm512_maskz_abs_epi32() {
43339	#[rustfmt::skip]
43340	let a = _mm512_setr_epi32(
43341	`0`, `1`, `-1`, i32::MAX,
43342	i32::MIN, `100`, `-100`, `-32`,
43343	`0`, `1`, `-1`, i32::MAX,
43344	i32::MIN, `100`, `-100`, `-32`,
43345	);
43346	let r = _mm512_maskz_abs_epi32(`0`, a);
43347	assert_eq_m512i(r, _mm512_setzero_si512());
43348	let r = _mm512_maskz_abs_epi32(`0b00000000_11111111`, a);
43349	#[rustfmt::skip]
43350	let e = _mm512_setr_epi32(
43351	`0`, `1`, `1`, i32::MAX,
43352	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
43353	`0`, `0`, `0`, `0`,
43354	`0`, `0`, `0`, `0`,
43355	);
43356	assert_eq_m512i(r, e);
43357	}
43358
43359	#[simd_test(enable = "avx512f,avx512vl")]
43360	unsafe fn test_mm256_mask_abs_epi32() {
43361	#[rustfmt::skip]
43362	let a = _mm256_setr_epi32(
43363	`0`, `1`, `-1`, i32::MAX,
43364	i32::MIN, `100`, `-100`, `-32`,
43365	);
43366	let r = _mm256_mask_abs_epi32(a, `0`, a);
43367	assert_eq_m256i(r, a);
43368	let r = _mm256_mask_abs_epi32(a, `0b00001111`, a);
43369	#[rustfmt::skip]
43370	let e = _mm256_setr_epi32(
43371	`0`, `1`, `1`, i32::MAX,
43372	i32::MAX.wrapping_add(`1`), `100`, `-100`, `-32`,
43373	);
43374	assert_eq_m256i(r, e);
43375	}
43376
43377	#[simd_test(enable = "avx512f,avx512vl")]
43378	unsafe fn test_mm256_maskz_abs_epi32() {
43379	#[rustfmt::skip]
43380	let a = _mm256_setr_epi32(
43381	`0`, `1`, `-1`, i32::MAX,
43382	i32::MIN, `100`, `-100`, `-32`,
43383	);
43384	let r = _mm256_maskz_abs_epi32(`0`, a);
43385	assert_eq_m256i(r, _mm256_setzero_si256());
43386	let r = _mm256_maskz_abs_epi32(`0b00001111`, a);
43387	#[rustfmt::skip]
43388	let e = _mm256_setr_epi32(
43389	`0`, `1`, `1`, i32::MAX,
43390	`0`, `0`, `0`, `0`,
43391	);
43392	assert_eq_m256i(r, e);
43393	}
43394
43395	#[simd_test(enable = "avx512f,avx512vl")]
43396	unsafe fn test_mm_mask_abs_epi32() {
43397	let a = _mm_setr_epi32(i32::MIN, `100`, `-100`, `-32`);
43398	let r = _mm_mask_abs_epi32(a, `0`, a);
43399	assert_eq_m128i(r, a);
43400	let r = _mm_mask_abs_epi32(a, `0b00001111`, a);
43401	let e = _mm_setr_epi32(i32::MAX.wrapping_add(`1`), `100`, `100`, `32`);
43402	assert_eq_m128i(r, e);
43403	}
43404
43405	#[simd_test(enable = "avx512f,avx512vl")]
43406	unsafe fn test_mm_maskz_abs_epi32() {
43407	let a = _mm_setr_epi32(i32::MIN, `100`, `-100`, `-32`);
43408	let r = _mm_maskz_abs_epi32(`0`, a);
43409	assert_eq_m128i(r, _mm_setzero_si128());
43410	let r = _mm_maskz_abs_epi32(`0b00001111`, a);
43411	let e = _mm_setr_epi32(i32::MAX.wrapping_add(`1`), `100`, `100`, `32`);
43412	assert_eq_m128i(r, e);
43413	}
43414
43415	#[simd_test(enable = "avx512f")]
43416	unsafe fn test_mm512_abs_ps() {
43417	#[rustfmt::skip]
43418	let a = _mm512_setr_ps(
43419	`0.`, `1.`, `-1.`, f32::MAX,
43420	f32::MIN, `100.`, `-100.`, `-32.`,
43421	`0.`, `1.`, `-1.`, f32::MAX,
43422	f32::MIN, `100.`, `-100.`, `-32.`,
43423	);
43424	let r = _mm512_abs_ps(a);
43425	#[rustfmt::skip]
43426	let e = _mm512_setr_ps(
43427	`0.`, `1.`, `1.`, f32::MAX,
43428	f32::MAX, `100.`, `100.`, `32.`,
43429	`0.`, `1.`, `1.`, f32::MAX,
43430	f32::MAX, `100.`, `100.`, `32.`,
43431	);
43432	assert_eq_m512(r, e);
43433	}
43434
43435	#[simd_test(enable = "avx512f")]
43436	unsafe fn test_mm512_mask_abs_ps() {
43437	#[rustfmt::skip]
43438	let a = _mm512_setr_ps(
43439	`0.`, `1.`, `-1.`, f32::MAX,
43440	f32::MIN, `100.`, `-100.`, `-32.`,
43441	`0.`, `1.`, `-1.`, f32::MAX,
43442	f32::MIN, `100.`, `-100.`, `-32.`,
43443	);
43444	let r = _mm512_mask_abs_ps(a, `0`, a);
43445	assert_eq_m512(r, a);
43446	let r = _mm512_mask_abs_ps(a, `0b00000000_11111111`, a);
43447	#[rustfmt::skip]
43448	let e = _mm512_setr_ps(
43449	`0.`, `1.`, `1.`, f32::MAX,
43450	f32::MAX, `100.`, `100.`, `32.`,
43451	`0.`, `1.`, `-1.`, f32::MAX,
43452	f32::MIN, `100.`, `-100.`, `-32.`,
43453	);
43454	assert_eq_m512(r, e);
43455	}
43456
43457	#[simd_test(enable = "avx512f")]
43458	unsafe fn test_mm512_mask_mov_epi32() {
43459	let src = _mm512_set1_epi32(`1`);
43460	let a = _mm512_set1_epi32(`2`);
43461	let r = _mm512_mask_mov_epi32(src, `0`, a);
43462	assert_eq_m512i(r, src);
43463	let r = _mm512_mask_mov_epi32(src, `0b11111111_11111111`, a);
43464	assert_eq_m512i(r, a);
43465	}
43466
43467	#[simd_test(enable = "avx512f")]
43468	unsafe fn test_mm512_maskz_mov_epi32() {
43469	let a = _mm512_set1_epi32(`2`);
43470	let r = _mm512_maskz_mov_epi32(`0`, a);
43471	assert_eq_m512i(r, _mm512_setzero_si512());
43472	let r = _mm512_maskz_mov_epi32(`0b11111111_11111111`, a);
43473	assert_eq_m512i(r, a);
43474	}
43475
43476	#[simd_test(enable = "avx512f,avx512vl")]
43477	unsafe fn test_mm256_mask_mov_epi32() {
43478	let src = _mm256_set1_epi32(`1`);
43479	let a = _mm256_set1_epi32(`2`);
43480	let r = _mm256_mask_mov_epi32(src, `0`, a);
43481	assert_eq_m256i(r, src);
43482	let r = _mm256_mask_mov_epi32(src, `0b11111111`, a);
43483	assert_eq_m256i(r, a);
43484	}
43485
43486	#[simd_test(enable = "avx512f,avx512vl")]
43487	unsafe fn test_mm256_maskz_mov_epi32() {
43488	let a = _mm256_set1_epi32(`2`);
43489	let r = _mm256_maskz_mov_epi32(`0`, a);
43490	assert_eq_m256i(r, _mm256_setzero_si256());
43491	let r = _mm256_maskz_mov_epi32(`0b11111111`, a);
43492	assert_eq_m256i(r, a);
43493	}
43494
43495	#[simd_test(enable = "avx512f,avx512vl")]
43496	unsafe fn test_mm_mask_mov_epi32() {
43497	let src = _mm_set1_epi32(`1`);
43498	let a = _mm_set1_epi32(`2`);
43499	let r = _mm_mask_mov_epi32(src, `0`, a);
43500	assert_eq_m128i(r, src);
43501	let r = _mm_mask_mov_epi32(src, `0b00001111`, a);
43502	assert_eq_m128i(r, a);
43503	}
43504
43505	#[simd_test(enable = "avx512f,avx512vl")]
43506	unsafe fn test_mm_maskz_mov_epi32() {
43507	let a = _mm_set1_epi32(`2`);
43508	let r = _mm_maskz_mov_epi32(`0`, a);
43509	assert_eq_m128i(r, _mm_setzero_si128());
43510	let r = _mm_maskz_mov_epi32(`0b00001111`, a);
43511	assert_eq_m128i(r, a);
43512	}
43513
43514	#[simd_test(enable = "avx512f")]
43515	unsafe fn test_mm512_mask_mov_ps() {
43516	let src = _mm512_set1_ps(`1.`);
43517	let a = _mm512_set1_ps(`2.`);
43518	let r = _mm512_mask_mov_ps(src, `0`, a);
43519	assert_eq_m512(r, src);
43520	let r = _mm512_mask_mov_ps(src, `0b11111111_11111111`, a);
43521	assert_eq_m512(r, a);
43522	}
43523
43524	#[simd_test(enable = "avx512f")]
43525	unsafe fn test_mm512_maskz_mov_ps() {
43526	let a = _mm512_set1_ps(`2.`);
43527	let r = _mm512_maskz_mov_ps(`0`, a);
43528	assert_eq_m512(r, _mm512_setzero_ps());
43529	let r = _mm512_maskz_mov_ps(`0b11111111_11111111`, a);
43530	assert_eq_m512(r, a);
43531	}
43532
43533	#[simd_test(enable = "avx512f,avx512vl")]
43534	unsafe fn test_mm256_mask_mov_ps() {
43535	let src = _mm256_set1_ps(`1.`);
43536	let a = _mm256_set1_ps(`2.`);
43537	let r = _mm256_mask_mov_ps(src, `0`, a);
43538	assert_eq_m256(r, src);
43539	let r = _mm256_mask_mov_ps(src, `0b11111111`, a);
43540	assert_eq_m256(r, a);
43541	}
43542
43543	#[simd_test(enable = "avx512f,avx512vl")]
43544	unsafe fn test_mm256_maskz_mov_ps() {
43545	let a = _mm256_set1_ps(`2.`);
43546	let r = _mm256_maskz_mov_ps(`0`, a);
43547	assert_eq_m256(r, _mm256_setzero_ps());
43548	let r = _mm256_maskz_mov_ps(`0b11111111`, a);
43549	assert_eq_m256(r, a);
43550	}
43551
43552	#[simd_test(enable = "avx512f,avx512vl")]
43553	unsafe fn test_mm_mask_mov_ps() {
43554	let src = _mm_set1_ps(`1.`);
43555	let a = _mm_set1_ps(`2.`);
43556	let r = _mm_mask_mov_ps(src, `0`, a);
43557	assert_eq_m128(r, src);
43558	let r = _mm_mask_mov_ps(src, `0b00001111`, a);
43559	assert_eq_m128(r, a);
43560	}
43561
43562	#[simd_test(enable = "avx512f,avx512vl")]
43563	unsafe fn test_mm_maskz_mov_ps() {
43564	let a = _mm_set1_ps(`2.`);
43565	let r = _mm_maskz_mov_ps(`0`, a);
43566	assert_eq_m128(r, _mm_setzero_ps());
43567	let r = _mm_maskz_mov_ps(`0b00001111`, a);
43568	assert_eq_m128(r, a);
43569	}
43570
43571	#[simd_test(enable = "avx512f")]
43572	unsafe fn test_mm512_add_epi32() {
43573	#[rustfmt::skip]
43574	let a = _mm512_setr_epi32(
43575	`0`, `1`, `-1`, i32::MAX,
43576	i32::MIN, `100`, `-100`, `-32`,
43577	`0`, `1`, `-1`, i32::MAX,
43578	i32::MIN, `100`, `-100`, `-32`,
43579	);
43580	let b = _mm512_set1_epi32(`1`);
43581	let r = _mm512_add_epi32(a, b);
43582	#[rustfmt::skip]
43583	let e = _mm512_setr_epi32(
43584	`1`, `2`, `0`, i32::MIN,
43585	i32::MIN + `1`, `101`, `-99`, `-31`,
43586	`1`, `2`, `0`, i32::MIN,
43587	i32::MIN + `1`, `101`, `-99`, `-31`,
43588	);
43589	assert_eq_m512i(r, e);
43590	}
43591
43592	#[simd_test(enable = "avx512f")]
43593	unsafe fn test_mm512_mask_add_epi32() {
43594	#[rustfmt::skip]
43595	let a = _mm512_setr_epi32(
43596	`0`, `1`, `-1`, i32::MAX,
43597	i32::MIN, `100`, `-100`, `-32`,
43598	`0`, `1`, `-1`, i32::MAX,
43599	i32::MIN, `100`, `-100`, `-32`,
43600	);
43601	let b = _mm512_set1_epi32(`1`);
43602	let r = _mm512_mask_add_epi32(a, `0`, a, b);
43603	assert_eq_m512i(r, a);
43604	let r = _mm512_mask_add_epi32(a, `0b00000000_11111111`, a, b);
43605	#[rustfmt::skip]
43606	let e = _mm512_setr_epi32(
43607	`1`, `2`, `0`, i32::MIN,
43608	i32::MIN + `1`, `101`, `-99`, `-31`,
43609	`0`, `1`, `-1`, i32::MAX,
43610	i32::MIN, `100`, `-100`, `-32`,
43611	);
43612	assert_eq_m512i(r, e);
43613	}
43614
43615	#[simd_test(enable = "avx512f")]
43616	unsafe fn test_mm512_maskz_add_epi32() {
43617	#[rustfmt::skip]
43618	let a = _mm512_setr_epi32(
43619	`0`, `1`, `-1`, i32::MAX,
43620	i32::MIN, `100`, `-100`, `-32`,
43621	`0`, `1`, `-1`, i32::MAX,
43622	i32::MIN, `100`, `-100`, `-32`,
43623	);
43624	let b = _mm512_set1_epi32(`1`);
43625	let r = _mm512_maskz_add_epi32(`0`, a, b);
43626	assert_eq_m512i(r, _mm512_setzero_si512());
43627	let r = _mm512_maskz_add_epi32(`0b00000000_11111111`, a, b);
43628	#[rustfmt::skip]
43629	let e = _mm512_setr_epi32(
43630	`1`, `2`, `0`, i32::MIN,
43631	i32::MIN + `1`, `101`, `-99`, `-31`,
43632	`0`, `0`, `0`, `0`,
43633	`0`, `0`, `0`, `0`,
43634	);
43635	assert_eq_m512i(r, e);
43636	}
43637
43638	#[simd_test(enable = "avx512f,avx512vl")]
43639	unsafe fn test_mm256_mask_add_epi32() {
43640	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
43641	let b = _mm256_set1_epi32(`1`);
43642	let r = _mm256_mask_add_epi32(a, `0`, a, b);
43643	assert_eq_m256i(r, a);
43644	let r = _mm256_mask_add_epi32(a, `0b11111111`, a, b);
43645	let e = _mm256_set_epi32(`1`, `2`, `0`, i32::MIN, i32::MIN + `1`, `101`, `-99`, `-31`);
43646	assert_eq_m256i(r, e);
43647	}
43648
43649	#[simd_test(enable = "avx512f,avx512vl")]
43650	unsafe fn test_mm256_maskz_add_epi32() {
43651	let a = _mm256_setr_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
43652	let b = _mm256_set1_epi32(`1`);
43653	let r = _mm256_maskz_add_epi32(`0`, a, b);
43654	assert_eq_m256i(r, _mm256_setzero_si256());
43655	let r = _mm256_maskz_add_epi32(`0b11111111`, a, b);
43656	let e = _mm256_setr_epi32(`1`, `2`, `0`, i32::MIN, i32::MIN + `1`, `101`, `-99`, `-31`);
43657	assert_eq_m256i(r, e);
43658	}
43659
43660	#[simd_test(enable = "avx512f,avx512vl")]
43661	unsafe fn test_mm_mask_add_epi32() {
43662	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
43663	let b = _mm_set1_epi32(`1`);
43664	let r = _mm_mask_add_epi32(a, `0`, a, b);
43665	assert_eq_m128i(r, a);
43666	let r = _mm_mask_add_epi32(a, `0b00001111`, a, b);
43667	let e = _mm_set_epi32(`2`, `0`, i32::MIN, i32::MIN + `1`);
43668	assert_eq_m128i(r, e);
43669	}
43670
43671	#[simd_test(enable = "avx512f,avx512vl")]
43672	unsafe fn test_mm_maskz_add_epi32() {
43673	let a = _mm_setr_epi32(`1`, `-1`, i32::MAX, i32::MIN);
43674	let b = _mm_set1_epi32(`1`);
43675	let r = _mm_maskz_add_epi32(`0`, a, b);
43676	assert_eq_m128i(r, _mm_setzero_si128());
43677	let r = _mm_maskz_add_epi32(`0b00001111`, a, b);
43678	let e = _mm_setr_epi32(`2`, `0`, i32::MIN, i32::MIN + `1`);
43679	assert_eq_m128i(r, e);
43680	}
43681
43682	#[simd_test(enable = "avx512f")]
43683	unsafe fn test_mm512_add_ps() {
43684	#[rustfmt::skip]
43685	let a = _mm512_setr_ps(
43686	`0.`, `1.`, `-1.`, f32::MAX,
43687	f32::MIN, `100.`, `-100.`, `-32.`,
43688	`0.`, `1.`, `-1.`, f32::MAX,
43689	f32::MIN, `100.`, `-100.`, `-32.`,
43690	);
43691	let b = _mm512_set1_ps(`1.`);
43692	let r = _mm512_add_ps(a, b);
43693	#[rustfmt::skip]
43694	let e = _mm512_setr_ps(
43695	`1.`, `2.`, `0.`, f32::MAX,
43696	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
43697	`1.`, `2.`, `0.`, f32::MAX,
43698	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
43699	);
43700	assert_eq_m512(r, e);
43701	}
43702
43703	#[simd_test(enable = "avx512f")]
43704	unsafe fn test_mm512_mask_add_ps() {
43705	#[rustfmt::skip]
43706	let a = _mm512_setr_ps(
43707	`0.`, `1.`, `-1.`, f32::MAX,
43708	f32::MIN, `100.`, `-100.`, `-32.`,
43709	`0.`, `1.`, `-1.`, f32::MAX,
43710	f32::MIN, `100.`, `-100.`, `-32.`,
43711	);
43712	let b = _mm512_set1_ps(`1.`);
43713	let r = _mm512_mask_add_ps(a, `0`, a, b);
43714	assert_eq_m512(r, a);
43715	let r = _mm512_mask_add_ps(a, `0b00000000_11111111`, a, b);
43716	#[rustfmt::skip]
43717	let e = _mm512_setr_ps(
43718	`1.`, `2.`, `0.`, f32::MAX,
43719	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
43720	`0.`, `1.`, `-1.`, f32::MAX,
43721	f32::MIN, `100.`, `-100.`, `-32.`,
43722	);
43723	assert_eq_m512(r, e);
43724	}
43725
43726	#[simd_test(enable = "avx512f")]
43727	unsafe fn test_mm512_maskz_add_ps() {
43728	#[rustfmt::skip]
43729	let a = _mm512_setr_ps(
43730	`0.`, `1.`, `-1.`, f32::MAX,
43731	f32::MIN, `100.`, `-100.`, `-32.`,
43732	`0.`, `1.`, `-1.`, f32::MAX,
43733	f32::MIN, `100.`, `-100.`, `-32.`,
43734	);
43735	let b = _mm512_set1_ps(`1.`);
43736	let r = _mm512_maskz_add_ps(`0`, a, b);
43737	assert_eq_m512(r, _mm512_setzero_ps());
43738	let r = _mm512_maskz_add_ps(`0b00000000_11111111`, a, b);
43739	#[rustfmt::skip]
43740	let e = _mm512_setr_ps(
43741	`1.`, `2.`, `0.`, f32::MAX,
43742	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
43743	`0.`, `0.`, `0.`, `0.`,
43744	`0.`, `0.`, `0.`, `0.`,
43745	);
43746	assert_eq_m512(r, e);
43747	}
43748
43749	#[simd_test(enable = "avx512f,avx512vl")]
43750	unsafe fn test_mm256_mask_add_ps() {
43751	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
43752	let b = _mm256_set1_ps(`1.`);
43753	let r = _mm256_mask_add_ps(a, `0`, a, b);
43754	assert_eq_m256(r, a);
43755	let r = _mm256_mask_add_ps(a, `0b11111111`, a, b);
43756	let e = _mm256_set_ps(`1.`, `2.`, `0.`, f32::MAX, f32::MIN + `1.`, `101.`, `-99.`, `-31.`);
43757	assert_eq_m256(r, e);
43758	}
43759
43760	#[simd_test(enable = "avx512f,avx512vl")]
43761	unsafe fn test_mm256_maskz_add_ps() {
43762	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
43763	let b = _mm256_set1_ps(`1.`);
43764	let r = _mm256_maskz_add_ps(`0`, a, b);
43765	assert_eq_m256(r, _mm256_setzero_ps());
43766	let r = _mm256_maskz_add_ps(`0b11111111`, a, b);
43767	let e = _mm256_set_ps(`1.`, `2.`, `0.`, f32::MAX, f32::MIN + `1.`, `101.`, `-99.`, `-31.`);
43768	assert_eq_m256(r, e);
43769	}
43770
43771	#[simd_test(enable = "avx512f,avx512vl")]
43772	unsafe fn test_mm_mask_add_ps() {
43773	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
43774	let b = _mm_set1_ps(`1.`);
43775	let r = _mm_mask_add_ps(a, `0`, a, b);
43776	assert_eq_m128(r, a);
43777	let r = _mm_mask_add_ps(a, `0b00001111`, a, b);
43778	let e = _mm_set_ps(`2.`, `0.`, f32::MAX, f32::MIN + `1.`);
43779	assert_eq_m128(r, e);
43780	}
43781
43782	#[simd_test(enable = "avx512f,avx512vl")]
43783	unsafe fn test_mm_maskz_add_ps() {
43784	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
43785	let b = _mm_set1_ps(`1.`);
43786	let r = _mm_maskz_add_ps(`0`, a, b);
43787	assert_eq_m128(r, _mm_setzero_ps());
43788	let r = _mm_maskz_add_ps(`0b00001111`, a, b);
43789	let e = _mm_set_ps(`2.`, `0.`, f32::MAX, f32::MIN + `1.`);
43790	assert_eq_m128(r, e);
43791	}
43792
43793	#[simd_test(enable = "avx512f")]
43794	unsafe fn test_mm512_sub_epi32() {
43795	#[rustfmt::skip]
43796	let a = _mm512_setr_epi32(
43797	`0`, `1`, `-1`, i32::MAX,
43798	i32::MIN, `100`, `-100`, `-32`,
43799	`0`, `1`, `-1`, i32::MAX,
43800	i32::MIN, `100`, `-100`, `-32`,
43801	);
43802	let b = _mm512_set1_epi32(`1`);
43803	let r = _mm512_sub_epi32(a, b);
43804	#[rustfmt::skip]
43805	let e = _mm512_setr_epi32(
43806	`-1`, `0`, `-2`, i32::MAX - `1`,
43807	i32::MAX, `99`, `-101`, `-33`,
43808	`-1`, `0`, `-2`, i32::MAX - `1`,
43809	i32::MAX, `99`, `-101`, `-33`,
43810	);
43811	assert_eq_m512i(r, e);
43812	}
43813
43814	#[simd_test(enable = "avx512f")]
43815	unsafe fn test_mm512_mask_sub_epi32() {
43816	#[rustfmt::skip]
43817	let a = _mm512_setr_epi32(
43818	`0`, `1`, `-1`, i32::MAX,
43819	i32::MIN, `100`, `-100`, `-32`,
43820	`0`, `1`, `-1`, i32::MAX,
43821	i32::MIN, `100`, `-100`, `-32`,
43822	);
43823	let b = _mm512_set1_epi32(`1`);
43824	let r = _mm512_mask_sub_epi32(a, `0`, a, b);
43825	assert_eq_m512i(r, a);
43826	let r = _mm512_mask_sub_epi32(a, `0b00000000_11111111`, a, b);
43827	#[rustfmt::skip]
43828	let e = _mm512_setr_epi32(
43829	`-1`, `0`, `-2`, i32::MAX - `1`,
43830	i32::MAX, `99`, `-101`, `-33`,
43831	`0`, `1`, `-1`, i32::MAX,
43832	i32::MIN, `100`, `-100`, `-32`,
43833	);
43834	assert_eq_m512i(r, e);
43835	}
43836
43837	#[simd_test(enable = "avx512f")]
43838	unsafe fn test_mm512_maskz_sub_epi32() {
43839	#[rustfmt::skip]
43840	let a = _mm512_setr_epi32(
43841	`0`, `1`, `-1`, i32::MAX,
43842	i32::MIN, `100`, `-100`, `-32`,
43843	`0`, `1`, `-1`, i32::MAX,
43844	i32::MIN, `100`, `-100`, `-32`,
43845	);
43846	let b = _mm512_set1_epi32(`1`);
43847	let r = _mm512_maskz_sub_epi32(`0`, a, b);
43848	assert_eq_m512i(r, _mm512_setzero_si512());
43849	let r = _mm512_maskz_sub_epi32(`0b00000000_11111111`, a, b);
43850	#[rustfmt::skip]
43851	let e = _mm512_setr_epi32(
43852	`-1`, `0`, `-2`, i32::MAX - `1`,
43853	i32::MAX, `99`, `-101`, `-33`,
43854	`0`, `0`, `0`, `0`,
43855	`0`, `0`, `0`, `0`,
43856	);
43857	assert_eq_m512i(r, e);
43858	}
43859
43860	#[simd_test(enable = "avx512f,avx512vl")]
43861	unsafe fn test_mm256_mask_sub_epi32() {
43862	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
43863	let b = _mm256_set1_epi32(`1`);
43864	let r = _mm256_mask_sub_epi32(a, `0`, a, b);
43865	assert_eq_m256i(r, a);
43866	let r = _mm256_mask_sub_epi32(a, `0b11111111`, a, b);
43867	let e = _mm256_set_epi32(`-1`, `0`, `-2`, i32::MAX - `1`, i32::MAX, `99`, `-101`, `-33`);
43868	assert_eq_m256i(r, e);
43869	}
43870
43871	#[simd_test(enable = "avx512f,avx512vl")]
43872	unsafe fn test_mm256_maskz_sub_epi32() {
43873	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
43874	let b = _mm256_set1_epi32(`1`);
43875	let r = _mm256_maskz_sub_epi32(`0`, a, b);
43876	assert_eq_m256i(r, _mm256_setzero_si256());
43877	let r = _mm256_maskz_sub_epi32(`0b11111111`, a, b);
43878	let e = _mm256_set_epi32(`-1`, `0`, `-2`, i32::MAX - `1`, i32::MAX, `99`, `-101`, `-33`);
43879	assert_eq_m256i(r, e);
43880	}
43881
43882	#[simd_test(enable = "avx512f,avx512vl")]
43883	unsafe fn test_mm_mask_sub_epi32() {
43884	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
43885	let b = _mm_set1_epi32(`1`);
43886	let r = _mm_mask_sub_epi32(a, `0`, a, b);
43887	assert_eq_m128i(r, a);
43888	let r = _mm_mask_sub_epi32(a, `0b00001111`, a, b);
43889	let e = _mm_set_epi32(`0`, `-2`, i32::MAX - `1`, i32::MAX);
43890	assert_eq_m128i(r, e);
43891	}
43892
43893	#[simd_test(enable = "avx512f,avx512vl")]
43894	unsafe fn test_mm_maskz_sub_epi32() {
43895	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
43896	let b = _mm_set1_epi32(`1`);
43897	let r = _mm_maskz_sub_epi32(`0`, a, b);
43898	assert_eq_m128i(r, _mm_setzero_si128());
43899	let r = _mm_maskz_sub_epi32(`0b00001111`, a, b);
43900	let e = _mm_set_epi32(`0`, `-2`, i32::MAX - `1`, i32::MAX);
43901	assert_eq_m128i(r, e);
43902	}
43903
43904	#[simd_test(enable = "avx512f")]
43905	unsafe fn test_mm512_sub_ps() {
43906	#[rustfmt::skip]
43907	let a = _mm512_setr_ps(
43908	`0.`, `1.`, `-1.`, f32::MAX,
43909	f32::MIN, `100.`, `-100.`, `-32.`,
43910	`0.`, `1.`, `-1.`, f32::MAX,
43911	f32::MIN, `100.`, `-100.`, `-32.`,
43912	);
43913	let b = _mm512_set1_ps(`1.`);
43914	let r = _mm512_sub_ps(a, b);
43915	#[rustfmt::skip]
43916	let e = _mm512_setr_ps(
43917	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
43918	f32::MIN, `99.`, `-101.`, `-33.`,
43919	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
43920	f32::MIN, `99.`, `-101.`, `-33.`,
43921	);
43922	assert_eq_m512(r, e);
43923	}
43924
43925	#[simd_test(enable = "avx512f")]
43926	unsafe fn test_mm512_mask_sub_ps() {
43927	#[rustfmt::skip]
43928	let a = _mm512_setr_ps(
43929	`0.`, `1.`, `-1.`, f32::MAX,
43930	f32::MIN, `100.`, `-100.`, `-32.`,
43931	`0.`, `1.`, `-1.`, f32::MAX,
43932	f32::MIN, `100.`, `-100.`, `-32.`,
43933	);
43934	let b = _mm512_set1_ps(`1.`);
43935	let r = _mm512_mask_sub_ps(a, `0`, a, b);
43936	assert_eq_m512(r, a);
43937	let r = _mm512_mask_sub_ps(a, `0b00000000_11111111`, a, b);
43938	#[rustfmt::skip]
43939	let e = _mm512_setr_ps(
43940	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
43941	f32::MIN, `99.`, `-101.`, `-33.`,
43942	`0.`, `1.`, `-1.`, f32::MAX,
43943	f32::MIN, `100.`, `-100.`, `-32.`,
43944	);
43945	assert_eq_m512(r, e);
43946	}
43947
43948	#[simd_test(enable = "avx512f")]
43949	unsafe fn test_mm512_maskz_sub_ps() {
43950	#[rustfmt::skip]
43951	let a = _mm512_setr_ps(
43952	`0.`, `1.`, `-1.`, f32::MAX,
43953	f32::MIN, `100.`, `-100.`, `-32.`,
43954	`0.`, `1.`, `-1.`, f32::MAX,
43955	f32::MIN, `100.`, `-100.`, `-32.`,
43956	);
43957	let b = _mm512_set1_ps(`1.`);
43958	let r = _mm512_maskz_sub_ps(`0`, a, b);
43959	assert_eq_m512(r, _mm512_setzero_ps());
43960	let r = _mm512_maskz_sub_ps(`0b00000000_11111111`, a, b);
43961	#[rustfmt::skip]
43962	let e = _mm512_setr_ps(
43963	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
43964	f32::MIN, `99.`, `-101.`, `-33.`,
43965	`0.`, `0.`, `0.`, `0.`,
43966	`0.`, `0.`, `0.`, `0.`,
43967	);
43968	assert_eq_m512(r, e);
43969	}
43970
43971	#[simd_test(enable = "avx512f,avx512vl")]
43972	unsafe fn test_mm256_mask_sub_ps() {
43973	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
43974	let b = _mm256_set1_ps(`1.`);
43975	let r = _mm256_mask_sub_ps(a, `0`, a, b);
43976	assert_eq_m256(r, a);
43977	let r = _mm256_mask_sub_ps(a, `0b11111111`, a, b);
43978	let e = _mm256_set_ps(`-1.`, `0.`, `-2.`, f32::MAX - `1.`, f32::MIN, `99.`, `-101.`, `-33.`);
43979	assert_eq_m256(r, e);
43980	}
43981
43982	#[simd_test(enable = "avx512f,avx512vl")]
43983	unsafe fn test_mm256_maskz_sub_ps() {
43984	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
43985	let b = _mm256_set1_ps(`1.`);
43986	let r = _mm256_maskz_sub_ps(`0`, a, b);
43987	assert_eq_m256(r, _mm256_setzero_ps());
43988	let r = _mm256_maskz_sub_ps(`0b11111111`, a, b);
43989	let e = _mm256_set_ps(`-1.`, `0.`, `-2.`, f32::MAX - `1.`, f32::MIN, `99.`, `-101.`, `-33.`);
43990	assert_eq_m256(r, e);
43991	}
43992
43993	#[simd_test(enable = "avx512f,avx512vl")]
43994	unsafe fn test_mm_mask_sub_ps() {
43995	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
43996	let b = _mm_set1_ps(`1.`);
43997	let r = _mm_mask_sub_ps(a, `0`, a, b);
43998	assert_eq_m128(r, a);
43999	let r = _mm_mask_sub_ps(a, `0b00001111`, a, b);
44000	let e = _mm_set_ps(`0.`, `-2.`, f32::MAX - `1.`, f32::MIN);
44001	assert_eq_m128(r, e);
44002	}
44003
44004	#[simd_test(enable = "avx512f,avx512vl")]
44005	unsafe fn test_mm_maskz_sub_ps() {
44006	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
44007	let b = _mm_set1_ps(`1.`);
44008	let r = _mm_maskz_sub_ps(`0`, a, b);
44009	assert_eq_m128(r, _mm_setzero_ps());
44010	let r = _mm_maskz_sub_ps(`0b00001111`, a, b);
44011	let e = _mm_set_ps(`0.`, `-2.`, f32::MAX - `1.`, f32::MIN);
44012	assert_eq_m128(r, e);
44013	}
44014
44015	#[simd_test(enable = "avx512f")]
44016	unsafe fn test_mm512_mullo_epi32() {
44017	#[rustfmt::skip]
44018	let a = _mm512_setr_epi32(
44019	`0`, `1`, `-1`, i32::MAX,
44020	i32::MIN, `100`, `-100`, `-32`,
44021	`0`, `1`, `-1`, i32::MAX,
44022	i32::MIN, `100`, `-100`, `-32`,
44023	);
44024	let b = _mm512_set1_epi32(`2`);
44025	let r = _mm512_mullo_epi32(a, b);
44026	let e = _mm512_setr_epi32(
44027	`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`, `0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`,
44028	);
44029	assert_eq_m512i(r, e);
44030	}
44031
44032	#[simd_test(enable = "avx512f")]
44033	unsafe fn test_mm512_mask_mullo_epi32() {
44034	#[rustfmt::skip]
44035	let a = _mm512_setr_epi32(
44036	`0`, `1`, `-1`, i32::MAX,
44037	i32::MIN, `100`, `-100`, `-32`,
44038	`0`, `1`, `-1`, i32::MAX,
44039	i32::MIN, `100`, `-100`, `-32`,
44040	);
44041	let b = _mm512_set1_epi32(`2`);
44042	let r = _mm512_mask_mullo_epi32(a, `0`, a, b);
44043	assert_eq_m512i(r, a);
44044	let r = _mm512_mask_mullo_epi32(a, `0b00000000_11111111`, a, b);
44045	#[rustfmt::skip]
44046	let e = _mm512_setr_epi32(
44047	`0`, `2`, `-2`, `-2`,
44048	`0`, `200`, `-200`, `-64`,
44049	`0`, `1`, `-1`, i32::MAX,
44050	i32::MIN, `100`, `-100`, `-32`,
44051	);
44052	assert_eq_m512i(r, e);
44053	}
44054
44055	#[simd_test(enable = "avx512f")]
44056	unsafe fn test_mm512_maskz_mullo_epi32() {
44057	#[rustfmt::skip]
44058	let a = _mm512_setr_epi32(
44059	`0`, `1`, `-1`, i32::MAX,
44060	i32::MIN, `100`, `-100`, `-32`,
44061	`0`, `1`, `-1`, i32::MAX,
44062	i32::MIN, `100`, `-100`, `-32`,
44063	);
44064	let b = _mm512_set1_epi32(`2`);
44065	let r = _mm512_maskz_mullo_epi32(`0`, a, b);
44066	assert_eq_m512i(r, _mm512_setzero_si512());
44067	let r = _mm512_maskz_mullo_epi32(`0b00000000_11111111`, a, b);
44068	let e = _mm512_setr_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
44069	assert_eq_m512i(r, e);
44070	}
44071
44072	#[simd_test(enable = "avx512f,avx512vl")]
44073	unsafe fn test_mm256_mask_mullo_epi32() {
44074	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
44075	let b = _mm256_set1_epi32(`2`);
44076	let r = _mm256_mask_mullo_epi32(a, `0`, a, b);
44077	assert_eq_m256i(r, a);
44078	let r = _mm256_mask_mullo_epi32(a, `0b11111111`, a, b);
44079	let e = _mm256_set_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`);
44080	assert_eq_m256i(r, e);
44081	}
44082
44083	#[simd_test(enable = "avx512f,avx512vl")]
44084	unsafe fn test_mm256_maskz_mullo_epi32() {
44085	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
44086	let b = _mm256_set1_epi32(`2`);
44087	let r = _mm256_maskz_mullo_epi32(`0`, a, b);
44088	assert_eq_m256i(r, _mm256_setzero_si256());
44089	let r = _mm256_maskz_mullo_epi32(`0b11111111`, a, b);
44090	let e = _mm256_set_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`);
44091	assert_eq_m256i(r, e);
44092	}
44093
44094	#[simd_test(enable = "avx512f,avx512vl")]
44095	unsafe fn test_mm_mask_mullo_epi32() {
44096	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
44097	let b = _mm_set1_epi32(`2`);
44098	let r = _mm_mask_mullo_epi32(a, `0`, a, b);
44099	assert_eq_m128i(r, a);
44100	let r = _mm_mask_mullo_epi32(a, `0b00001111`, a, b);
44101	let e = _mm_set_epi32(`2`, `-2`, `-2`, `0`);
44102	assert_eq_m128i(r, e);
44103	}
44104
44105	#[simd_test(enable = "avx512f,avx512vl")]
44106	unsafe fn test_mm_maskz_mullo_epi32() {
44107	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
44108	let b = _mm_set1_epi32(`2`);
44109	let r = _mm_maskz_mullo_epi32(`0`, a, b);
44110	assert_eq_m128i(r, _mm_setzero_si128());
44111	let r = _mm_maskz_mullo_epi32(`0b00001111`, a, b);
44112	let e = _mm_set_epi32(`2`, `-2`, `-2`, `0`);
44113	assert_eq_m128i(r, e);
44114	}
44115
44116	#[simd_test(enable = "avx512f")]
44117	unsafe fn test_mm512_mul_ps() {
44118	#[rustfmt::skip]
44119	let a = _mm512_setr_ps(
44120	`0.`, `1.`, `-1.`, f32::MAX,
44121	f32::MIN, `100.`, `-100.`, `-32.`,
44122	`0.`, `1.`, `-1.`, f32::MAX,
44123	f32::MIN, `100.`, `-100.`, `-32.`,
44124	);
44125	let b = _mm512_set1_ps(`2.`);
44126	let r = _mm512_mul_ps(a, b);
44127	#[rustfmt::skip]
44128	let e = _mm512_setr_ps(
44129	`0.`, `2.`, `-2.`, f32::INFINITY,
44130	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
44131	`0.`, `2.`, `-2.`, f32::INFINITY,
44132	f32::NEG_INFINITY, `200.`, `-200.`,
44133	`-64.`,
44134	);
44135	assert_eq_m512(r, e);
44136	}
44137
44138	#[simd_test(enable = "avx512f")]
44139	unsafe fn test_mm512_mask_mul_ps() {
44140	#[rustfmt::skip]
44141	let a = _mm512_setr_ps(
44142	`0.`, `1.`, `-1.`, f32::MAX,
44143	f32::MIN, `100.`, `-100.`, `-32.`,
44144	`0.`, `1.`, `-1.`, f32::MAX,
44145	f32::MIN, `100.`, `-100.`, `-32.`,
44146	);
44147	let b = _mm512_set1_ps(`2.`);
44148	let r = _mm512_mask_mul_ps(a, `0`, a, b);
44149	assert_eq_m512(r, a);
44150	let r = _mm512_mask_mul_ps(a, `0b00000000_11111111`, a, b);
44151	#[rustfmt::skip]
44152	let e = _mm512_setr_ps(
44153	`0.`, `2.`, `-2.`, f32::INFINITY,
44154	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
44155	`0.`, `1.`, `-1.`, f32::MAX,
44156	f32::MIN, `100.`, `-100.`, `-32.`,
44157	);
44158	assert_eq_m512(r, e);
44159	}
44160
44161	#[simd_test(enable = "avx512f")]
44162	unsafe fn test_mm512_maskz_mul_ps() {
44163	#[rustfmt::skip]
44164	let a = _mm512_setr_ps(
44165	`0.`, `1.`, `-1.`, f32::MAX,
44166	f32::MIN, `100.`, `-100.`, `-32.`,
44167	`0.`, `1.`, `-1.`, f32::MAX,
44168	f32::MIN, `100.`, `-100.`, `-32.`,
44169	);
44170	let b = _mm512_set1_ps(`2.`);
44171	let r = _mm512_maskz_mul_ps(`0`, a, b);
44172	assert_eq_m512(r, _mm512_setzero_ps());
44173	let r = _mm512_maskz_mul_ps(`0b00000000_11111111`, a, b);
44174	#[rustfmt::skip]
44175	let e = _mm512_setr_ps(
44176	`0.`, `2.`, `-2.`, f32::INFINITY,
44177	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
44178	`0.`, `0.`, `0.`, `0.`,
44179	`0.`, `0.`, `0.`, `0.`,
44180	);
44181	assert_eq_m512(r, e);
44182	}
44183
44184	#[simd_test(enable = "avx512f,avx512vl")]
44185	unsafe fn test_mm256_mask_mul_ps() {
44186	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
44187	let b = _mm256_set1_ps(`2.`);
44188	let r = _mm256_mask_mul_ps(a, `0`, a, b);
44189	assert_eq_m256(r, a);
44190	let r = _mm256_mask_mul_ps(a, `0b11111111`, a, b);
44191	#[rustfmt::skip]
44192	let e = _mm256_set_ps(
44193	`0.`, `2.`, `-2.`, f32::INFINITY,
44194	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
44195	);
44196	assert_eq_m256(r, e);
44197	}
44198
44199	#[simd_test(enable = "avx512f,avx512vl")]
44200	unsafe fn test_mm256_maskz_mul_ps() {
44201	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
44202	let b = _mm256_set1_ps(`2.`);
44203	let r = _mm256_maskz_mul_ps(`0`, a, b);
44204	assert_eq_m256(r, _mm256_setzero_ps());
44205	let r = _mm256_maskz_mul_ps(`0b11111111`, a, b);
44206	#[rustfmt::skip]
44207	let e = _mm256_set_ps(
44208	`0.`, `2.`, `-2.`, f32::INFINITY,
44209	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
44210	);
44211	assert_eq_m256(r, e);
44212	}
44213
44214	#[simd_test(enable = "avx512f,avx512vl")]
44215	unsafe fn test_mm_mask_mul_ps() {
44216	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
44217	let b = _mm_set1_ps(`2.`);
44218	let r = _mm_mask_mul_ps(a, `0`, a, b);
44219	assert_eq_m128(r, a);
44220	let r = _mm_mask_mul_ps(a, `0b00001111`, a, b);
44221	let e = _mm_set_ps(`2.`, `-2.`, f32::INFINITY, f32::NEG_INFINITY);
44222	assert_eq_m128(r, e);
44223	}
44224
44225	#[simd_test(enable = "avx512f,avx512vl")]
44226	unsafe fn test_mm_maskz_mul_ps() {
44227	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
44228	let b = _mm_set1_ps(`2.`);
44229	let r = _mm_maskz_mul_ps(`0`, a, b);
44230	assert_eq_m128(r, _mm_setzero_ps());
44231	let r = _mm_maskz_mul_ps(`0b00001111`, a, b);
44232	let e = _mm_set_ps(`2.`, `-2.`, f32::INFINITY, f32::NEG_INFINITY);
44233	assert_eq_m128(r, e);
44234	}
44235
44236	#[simd_test(enable = "avx512f")]
44237	unsafe fn test_mm512_div_ps() {
44238	let a = _mm512_setr_ps(
44239	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
44240	);
44241	let b = _mm512_setr_ps(
44242	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
44243	);
44244	let r = _mm512_div_ps(a, b);
44245	#[rustfmt::skip]
44246	let e = _mm512_setr_ps(
44247	`0.`, `0.5`, `-0.5`, `-1.`,
44248	`50.`, f32::INFINITY, `-50.`, `-16.`,
44249	`0.`, `0.5`, `-0.5`, `500.`,
44250	f32::NEG_INFINITY, `50.`, `-50.`, `-16.`,
44251	);
44252	assert_eq_m512(r, e); // 0/0 = NAN
44253	}
44254
44255	#[simd_test(enable = "avx512f")]
44256	unsafe fn test_mm512_mask_div_ps() {
44257	let a = _mm512_setr_ps(
44258	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
44259	);
44260	let b = _mm512_setr_ps(
44261	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
44262	);
44263	let r = _mm512_mask_div_ps(a, `0`, a, b);
44264	assert_eq_m512(r, a);
44265	let r = _mm512_mask_div_ps(a, `0b00000000_11111111`, a, b);
44266	#[rustfmt::skip]
44267	let e = _mm512_setr_ps(
44268	`0.`, `0.5`, `-0.5`, `-1.`,
44269	`50.`, f32::INFINITY, `-50.`, `-16.`,
44270	`0.`, `1.`, `-1.`, `1000.`,
44271	`-131.`, `100.`, `-100.`, `-32.`,
44272	);
44273	assert_eq_m512(r, e);
44274	}
44275
44276	#[simd_test(enable = "avx512f")]
44277	unsafe fn test_mm512_maskz_div_ps() {
44278	let a = _mm512_setr_ps(
44279	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
44280	);
44281	let b = _mm512_setr_ps(
44282	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
44283	);
44284	let r = _mm512_maskz_div_ps(`0`, a, b);
44285	assert_eq_m512(r, _mm512_setzero_ps());
44286	let r = _mm512_maskz_div_ps(`0b00000000_11111111`, a, b);
44287	#[rustfmt::skip]
44288	let e = _mm512_setr_ps(
44289	`0.`, `0.5`, `-0.5`, `-1.`,
44290	`50.`, f32::INFINITY, `-50.`, `-16.`,
44291	`0.`, `0.`, `0.`, `0.`,
44292	`0.`, `0.`, `0.`, `0.`,
44293	);
44294	assert_eq_m512(r, e);
44295	}
44296
44297	#[simd_test(enable = "avx512f,avx512vl")]
44298	unsafe fn test_mm256_mask_div_ps() {
44299	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`);
44300	let b = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`);
44301	let r = _mm256_mask_div_ps(a, `0`, a, b);
44302	assert_eq_m256(r, a);
44303	let r = _mm256_mask_div_ps(a, `0b11111111`, a, b);
44304	let e = _mm256_set_ps(`0.`, `0.5`, `-0.5`, `-1.`, `50.`, f32::INFINITY, `-50.`, `-16.`);
44305	assert_eq_m256(r, e);
44306	}
44307
44308	#[simd_test(enable = "avx512f,avx512vl")]
44309	unsafe fn test_mm256_maskz_div_ps() {
44310	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`);
44311	let b = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`);
44312	let r = _mm256_maskz_div_ps(`0`, a, b);
44313	assert_eq_m256(r, _mm256_setzero_ps());
44314	let r = _mm256_maskz_div_ps(`0b11111111`, a, b);
44315	let e = _mm256_set_ps(`0.`, `0.5`, `-0.5`, `-1.`, `50.`, f32::INFINITY, `-50.`, `-16.`);
44316	assert_eq_m256(r, e);
44317	}
44318
44319	#[simd_test(enable = "avx512f,avx512vl")]
44320	unsafe fn test_mm_mask_div_ps() {
44321	let a = _mm_set_ps(`100.`, `100.`, `-100.`, `-32.`);
44322	let b = _mm_set_ps(`2.`, `0.`, `2.`, `2.`);
44323	let r = _mm_mask_div_ps(a, `0`, a, b);
44324	assert_eq_m128(r, a);
44325	let r = _mm_mask_div_ps(a, `0b00001111`, a, b);
44326	let e = _mm_set_ps(`50.`, f32::INFINITY, `-50.`, `-16.`);
44327	assert_eq_m128(r, e);
44328	}
44329
44330	#[simd_test(enable = "avx512f,avx512vl")]
44331	unsafe fn test_mm_maskz_div_ps() {
44332	let a = _mm_set_ps(`100.`, `100.`, `-100.`, `-32.`);
44333	let b = _mm_set_ps(`2.`, `0.`, `2.`, `2.`);
44334	let r = _mm_maskz_div_ps(`0`, a, b);
44335	assert_eq_m128(r, _mm_setzero_ps());
44336	let r = _mm_maskz_div_ps(`0b00001111`, a, b);
44337	let e = _mm_set_ps(`50.`, f32::INFINITY, `-50.`, `-16.`);
44338	assert_eq_m128(r, e);
44339	}
44340
44341	#[simd_test(enable = "avx512f")]
44342	unsafe fn test_mm512_max_epi32() {
44343	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44344	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44345	let r = _mm512_max_epi32(a, b);
44346	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44347	assert_eq_m512i(r, e);
44348	}
44349
44350	#[simd_test(enable = "avx512f")]
44351	unsafe fn test_mm512_mask_max_epi32() {
44352	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44353	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44354	let r = _mm512_mask_max_epi32(a, `0`, a, b);
44355	assert_eq_m512i(r, a);
44356	let r = _mm512_mask_max_epi32(a, `0b00000000_11111111`, a, b);
44357	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44358	assert_eq_m512i(r, e);
44359	}
44360
44361	#[simd_test(enable = "avx512f")]
44362	unsafe fn test_mm512_maskz_max_epi32() {
44363	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44364	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44365	let r = _mm512_maskz_max_epi32(`0`, a, b);
44366	assert_eq_m512i(r, _mm512_setzero_si512());
44367	let r = _mm512_maskz_max_epi32(`0b00000000_11111111`, a, b);
44368	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
44369	assert_eq_m512i(r, e);
44370	}
44371
44372	#[simd_test(enable = "avx512f,avx512vl")]
44373	unsafe fn test_mm256_mask_max_epi32() {
44374	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44375	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44376	let r = _mm256_mask_max_epi32(a, `0`, a, b);
44377	assert_eq_m256i(r, a);
44378	let r = _mm256_mask_max_epi32(a, `0b11111111`, a, b);
44379	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
44380	assert_eq_m256i(r, e);
44381	}
44382
44383	#[simd_test(enable = "avx512f,avx512vl")]
44384	unsafe fn test_mm256_maskz_max_epi32() {
44385	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44386	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44387	let r = _mm256_maskz_max_epi32(`0`, a, b);
44388	assert_eq_m256i(r, _mm256_setzero_si256());
44389	let r = _mm256_maskz_max_epi32(`0b11111111`, a, b);
44390	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
44391	assert_eq_m256i(r, e);
44392	}
44393
44394	#[simd_test(enable = "avx512f,avx512vl")]
44395	unsafe fn test_mm_mask_max_epi32() {
44396	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44397	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44398	let r = _mm_mask_max_epi32(a, `0`, a, b);
44399	assert_eq_m128i(r, a);
44400	let r = _mm_mask_max_epi32(a, `0b00001111`, a, b);
44401	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
44402	assert_eq_m128i(r, e);
44403	}
44404
44405	#[simd_test(enable = "avx512f,avx512vl")]
44406	unsafe fn test_mm_maskz_max_epi32() {
44407	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44408	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44409	let r = _mm_maskz_max_epi32(`0`, a, b);
44410	assert_eq_m128i(r, _mm_setzero_si128());
44411	let r = _mm_maskz_max_epi32(`0b00001111`, a, b);
44412	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
44413	assert_eq_m128i(r, e);
44414	}
44415
44416	#[simd_test(enable = "avx512f")]
44417	unsafe fn test_mm512_max_ps() {
44418	let a = _mm512_setr_ps(
44419	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44420	);
44421	let b = _mm512_setr_ps(
44422	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44423	);
44424	let r = _mm512_max_ps(a, b);
44425	let e = _mm512_setr_ps(
44426	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44427	);
44428	assert_eq_m512(r, e);
44429	}
44430
44431	#[simd_test(enable = "avx512f")]
44432	unsafe fn test_mm512_mask_max_ps() {
44433	let a = _mm512_setr_ps(
44434	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44435	);
44436	let b = _mm512_setr_ps(
44437	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44438	);
44439	let r = _mm512_mask_max_ps(a, `0`, a, b);
44440	assert_eq_m512(r, a);
44441	let r = _mm512_mask_max_ps(a, `0b00000000_11111111`, a, b);
44442	let e = _mm512_setr_ps(
44443	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44444	);
44445	assert_eq_m512(r, e);
44446	}
44447
44448	#[simd_test(enable = "avx512f")]
44449	unsafe fn test_mm512_maskz_max_ps() {
44450	let a = _mm512_setr_ps(
44451	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44452	);
44453	let b = _mm512_setr_ps(
44454	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44455	);
44456	let r = _mm512_maskz_max_ps(`0`, a, b);
44457	assert_eq_m512(r, _mm512_setzero_ps());
44458	let r = _mm512_maskz_max_ps(`0b00000000_11111111`, a, b);
44459	let e = _mm512_setr_ps(
44460	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
44461	);
44462	assert_eq_m512(r, e);
44463	}
44464
44465	#[simd_test(enable = "avx512f,avx512vl")]
44466	unsafe fn test_mm256_mask_max_ps() {
44467	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44468	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
44469	let r = _mm256_mask_max_ps(a, `0`, a, b);
44470	assert_eq_m256(r, a);
44471	let r = _mm256_mask_max_ps(a, `0b11111111`, a, b);
44472	let e = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `4.`, `5.`, `6.`, `7.`);
44473	assert_eq_m256(r, e);
44474	}
44475
44476	#[simd_test(enable = "avx512f,avx512vl")]
44477	unsafe fn test_mm256_maskz_max_ps() {
44478	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44479	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
44480	let r = _mm256_maskz_max_ps(`0`, a, b);
44481	assert_eq_m256(r, _mm256_setzero_ps());
44482	let r = _mm256_maskz_max_ps(`0b11111111`, a, b);
44483	let e = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `4.`, `5.`, `6.`, `7.`);
44484	assert_eq_m256(r, e);
44485	}
44486
44487	#[simd_test(enable = "avx512f,avx512vl")]
44488	unsafe fn test_mm_mask_max_ps() {
44489	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
44490	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
44491	let r = _mm_mask_max_ps(a, `0`, a, b);
44492	assert_eq_m128(r, a);
44493	let r = _mm_mask_max_ps(a, `0b00001111`, a, b);
44494	let e = _mm_set_ps(`3.`, `2.`, `2.`, `3.`);
44495	assert_eq_m128(r, e);
44496	}
44497
44498	#[simd_test(enable = "avx512f,avx512vl")]
44499	unsafe fn test_mm_maskz_max_ps() {
44500	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
44501	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
44502	let r = _mm_maskz_max_ps(`0`, a, b);
44503	assert_eq_m128(r, _mm_setzero_ps());
44504	let r = _mm_mask_max_ps(a, `0b00001111`, a, b);
44505	let e = _mm_set_ps(`3.`, `2.`, `2.`, `3.`);
44506	assert_eq_m128(r, e);
44507	}
44508
44509	#[simd_test(enable = "avx512f")]
44510	unsafe fn test_mm512_max_epu32() {
44511	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44512	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44513	let r = _mm512_max_epu32(a, b);
44514	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44515	assert_eq_m512i(r, e);
44516	}
44517
44518	#[simd_test(enable = "avx512f")]
44519	unsafe fn test_mm512_mask_max_epu32() {
44520	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44521	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44522	let r = _mm512_mask_max_epu32(a, `0`, a, b);
44523	assert_eq_m512i(r, a);
44524	let r = _mm512_mask_max_epu32(a, `0b00000000_11111111`, a, b);
44525	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44526	assert_eq_m512i(r, e);
44527	}
44528
44529	#[simd_test(enable = "avx512f")]
44530	unsafe fn test_mm512_maskz_max_epu32() {
44531	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44532	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44533	let r = _mm512_maskz_max_epu32(`0`, a, b);
44534	assert_eq_m512i(r, _mm512_setzero_si512());
44535	let r = _mm512_maskz_max_epu32(`0b00000000_11111111`, a, b);
44536	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
44537	assert_eq_m512i(r, e);
44538	}
44539
44540	#[simd_test(enable = "avx512f,avx512vl")]
44541	unsafe fn test_mm256_mask_max_epu32() {
44542	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44543	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44544	let r = _mm256_mask_max_epu32(a, `0`, a, b);
44545	assert_eq_m256i(r, a);
44546	let r = _mm256_mask_max_epu32(a, `0b11111111`, a, b);
44547	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
44548	assert_eq_m256i(r, e);
44549	}
44550
44551	#[simd_test(enable = "avx512f,avx512vl")]
44552	unsafe fn test_mm256_maskz_max_epu32() {
44553	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44554	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44555	let r = _mm256_maskz_max_epu32(`0`, a, b);
44556	assert_eq_m256i(r, _mm256_setzero_si256());
44557	let r = _mm256_maskz_max_epu32(`0b11111111`, a, b);
44558	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
44559	assert_eq_m256i(r, e);
44560	}
44561
44562	#[simd_test(enable = "avx512f,avx512vl")]
44563	unsafe fn test_mm_mask_max_epu32() {
44564	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44565	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44566	let r = _mm_mask_max_epu32(a, `0`, a, b);
44567	assert_eq_m128i(r, a);
44568	let r = _mm_mask_max_epu32(a, `0b00001111`, a, b);
44569	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
44570	assert_eq_m128i(r, e);
44571	}
44572
44573	#[simd_test(enable = "avx512f,avx512vl")]
44574	unsafe fn test_mm_maskz_max_epu32() {
44575	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44576	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44577	let r = _mm_maskz_max_epu32(`0`, a, b);
44578	assert_eq_m128i(r, _mm_setzero_si128());
44579	let r = _mm_maskz_max_epu32(`0b00001111`, a, b);
44580	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
44581	assert_eq_m128i(r, e);
44582	}
44583
44584	#[simd_test(enable = "avx512f")]
44585	unsafe fn test_mm512_min_epi32() {
44586	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44587	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44588	let r = _mm512_min_epi32(a, b);
44589	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44590	assert_eq_m512i(r, e);
44591	}
44592
44593	#[simd_test(enable = "avx512f")]
44594	unsafe fn test_mm512_mask_min_epi32() {
44595	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44596	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44597	let r = _mm512_mask_min_epi32(a, `0`, a, b);
44598	assert_eq_m512i(r, a);
44599	let r = _mm512_mask_min_epi32(a, `0b00000000_11111111`, a, b);
44600	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44601	assert_eq_m512i(r, e);
44602	}
44603
44604	#[simd_test(enable = "avx512f")]
44605	unsafe fn test_mm512_maskz_min_epi32() {
44606	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44607	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44608	let r = _mm512_maskz_min_epi32(`0`, a, b);
44609	assert_eq_m512i(r, _mm512_setzero_si512());
44610	let r = _mm512_maskz_min_epi32(`0b00000000_11111111`, a, b);
44611	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
44612	assert_eq_m512i(r, e);
44613	}
44614
44615	#[simd_test(enable = "avx512f,avx512vl")]
44616	unsafe fn test_mm256_mask_min_epi32() {
44617	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44618	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44619	let r = _mm256_mask_min_epi32(a, `0`, a, b);
44620	assert_eq_m256i(r, a);
44621	let r = _mm256_mask_min_epi32(a, `0b11111111`, a, b);
44622	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
44623	assert_eq_m256i(r, e);
44624	}
44625
44626	#[simd_test(enable = "avx512f,avx512vl")]
44627	unsafe fn test_mm256_maskz_min_epi32() {
44628	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44629	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44630	let r = _mm256_maskz_min_epi32(`0`, a, b);
44631	assert_eq_m256i(r, _mm256_setzero_si256());
44632	let r = _mm256_maskz_min_epi32(`0b11111111`, a, b);
44633	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
44634	assert_eq_m256i(r, e);
44635	}
44636
44637	#[simd_test(enable = "avx512f,avx512vl")]
44638	unsafe fn test_mm_mask_min_epi32() {
44639	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44640	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44641	let r = _mm_mask_min_epi32(a, `0`, a, b);
44642	assert_eq_m128i(r, a);
44643	let r = _mm_mask_min_epi32(a, `0b00001111`, a, b);
44644	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
44645	assert_eq_m128i(r, e);
44646	}
44647
44648	#[simd_test(enable = "avx512f,avx512vl")]
44649	unsafe fn test_mm_maskz_min_epi32() {
44650	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44651	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44652	let r = _mm_maskz_min_epi32(`0`, a, b);
44653	assert_eq_m128i(r, _mm_setzero_si128());
44654	let r = _mm_maskz_min_epi32(`0b00001111`, a, b);
44655	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
44656	assert_eq_m128i(r, e);
44657	}
44658
44659	#[simd_test(enable = "avx512f")]
44660	unsafe fn test_mm512_min_ps() {
44661	let a = _mm512_setr_ps(
44662	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44663	);
44664	let b = _mm512_setr_ps(
44665	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44666	);
44667	let r = _mm512_min_ps(a, b);
44668	let e = _mm512_setr_ps(
44669	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44670	);
44671	assert_eq_m512(r, e);
44672	}
44673
44674	#[simd_test(enable = "avx512f")]
44675	unsafe fn test_mm512_mask_min_ps() {
44676	let a = _mm512_setr_ps(
44677	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44678	);
44679	let b = _mm512_setr_ps(
44680	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44681	);
44682	let r = _mm512_mask_min_ps(a, `0`, a, b);
44683	assert_eq_m512(r, a);
44684	let r = _mm512_mask_min_ps(a, `0b00000000_11111111`, a, b);
44685	let e = _mm512_setr_ps(
44686	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44687	);
44688	assert_eq_m512(r, e);
44689	}
44690
44691	#[simd_test(enable = "avx512f")]
44692	unsafe fn test_mm512_maskz_min_ps() {
44693	let a = _mm512_setr_ps(
44694	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44695	);
44696	let b = _mm512_setr_ps(
44697	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
44698	);
44699	let r = _mm512_maskz_min_ps(`0`, a, b);
44700	assert_eq_m512(r, _mm512_setzero_ps());
44701	let r = _mm512_maskz_min_ps(`0b00000000_11111111`, a, b);
44702	let e = _mm512_setr_ps(
44703	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
44704	);
44705	assert_eq_m512(r, e);
44706	}
44707
44708	#[simd_test(enable = "avx512f,avx512vl")]
44709	unsafe fn test_mm256_mask_min_ps() {
44710	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44711	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
44712	let r = _mm256_mask_min_ps(a, `0`, a, b);
44713	assert_eq_m256(r, a);
44714	let r = _mm256_mask_min_ps(a, `0b11111111`, a, b);
44715	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `3.`, `2.`, `1.`, `0.`);
44716	assert_eq_m256(r, e);
44717	}
44718
44719	#[simd_test(enable = "avx512f,avx512vl")]
44720	unsafe fn test_mm256_maskz_min_ps() {
44721	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44722	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
44723	let r = _mm256_maskz_min_ps(`0`, a, b);
44724	assert_eq_m256(r, _mm256_setzero_ps());
44725	let r = _mm256_maskz_min_ps(`0b11111111`, a, b);
44726	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `3.`, `2.`, `1.`, `0.`);
44727	assert_eq_m256(r, e);
44728	}
44729
44730	#[simd_test(enable = "avx512f,avx512vl")]
44731	unsafe fn test_mm_mask_min_ps() {
44732	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
44733	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
44734	let r = _mm_mask_min_ps(a, `0`, a, b);
44735	assert_eq_m128(r, a);
44736	let r = _mm_mask_min_ps(a, `0b00001111`, a, b);
44737	let e = _mm_set_ps(`0.`, `1.`, `1.`, `0.`);
44738	assert_eq_m128(r, e);
44739	}
44740
44741	#[simd_test(enable = "avx512f,avx512vl")]
44742	unsafe fn test_mm_maskz_min_ps() {
44743	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
44744	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
44745	let r = _mm_maskz_min_ps(`0`, a, b);
44746	assert_eq_m128(r, _mm_setzero_ps());
44747	let r = _mm_maskz_min_ps(`0b00001111`, a, b);
44748	let e = _mm_set_ps(`0.`, `1.`, `1.`, `0.`);
44749	assert_eq_m128(r, e);
44750	}
44751
44752	#[simd_test(enable = "avx512f")]
44753	unsafe fn test_mm512_min_epu32() {
44754	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44755	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44756	let r = _mm512_min_epu32(a, b);
44757	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44758	assert_eq_m512i(r, e);
44759	}
44760
44761	#[simd_test(enable = "avx512f")]
44762	unsafe fn test_mm512_mask_min_epu32() {
44763	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44764	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44765	let r = _mm512_mask_min_epu32(a, `0`, a, b);
44766	assert_eq_m512i(r, a);
44767	let r = _mm512_mask_min_epu32(a, `0b00000000_11111111`, a, b);
44768	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44769	assert_eq_m512i(r, e);
44770	}
44771
44772	#[simd_test(enable = "avx512f")]
44773	unsafe fn test_mm512_maskz_min_epu32() {
44774	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
44775	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44776	let r = _mm512_maskz_min_epu32(`0`, a, b);
44777	assert_eq_m512i(r, _mm512_setzero_si512());
44778	let r = _mm512_maskz_min_epu32(`0b00000000_11111111`, a, b);
44779	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
44780	assert_eq_m512i(r, e);
44781	}
44782
44783	#[simd_test(enable = "avx512f,avx512vl")]
44784	unsafe fn test_mm256_mask_min_epu32() {
44785	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44786	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44787	let r = _mm256_mask_min_epu32(a, `0`, a, b);
44788	assert_eq_m256i(r, a);
44789	let r = _mm256_mask_min_epu32(a, `0b11111111`, a, b);
44790	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
44791	assert_eq_m256i(r, e);
44792	}
44793
44794	#[simd_test(enable = "avx512f,avx512vl")]
44795	unsafe fn test_mm256_maskz_min_epu32() {
44796	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
44797	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
44798	let r = _mm256_maskz_min_epu32(`0`, a, b);
44799	assert_eq_m256i(r, _mm256_setzero_si256());
44800	let r = _mm256_maskz_min_epu32(`0b11111111`, a, b);
44801	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
44802	assert_eq_m256i(r, e);
44803	}
44804
44805	#[simd_test(enable = "avx512f,avx512vl")]
44806	unsafe fn test_mm_mask_min_epu32() {
44807	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44808	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44809	let r = _mm_mask_min_epu32(a, `0`, a, b);
44810	assert_eq_m128i(r, a);
44811	let r = _mm_mask_min_epu32(a, `0b00001111`, a, b);
44812	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
44813	assert_eq_m128i(r, e);
44814	}
44815
44816	#[simd_test(enable = "avx512f,avx512vl")]
44817	unsafe fn test_mm_maskz_min_epu32() {
44818	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
44819	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
44820	let r = _mm_maskz_min_epu32(`0`, a, b);
44821	assert_eq_m128i(r, _mm_setzero_si128());
44822	let r = _mm_maskz_min_epu32(`0b00001111`, a, b);
44823	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
44824	assert_eq_m128i(r, e);
44825	}
44826
44827	#[simd_test(enable = "avx512f")]
44828	unsafe fn test_mm512_sqrt_ps() {
44829	let a = _mm512_setr_ps(
44830	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
44831	);
44832	let r = _mm512_sqrt_ps(a);
44833	let e = _mm512_setr_ps(
44834	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44835	);
44836	assert_eq_m512(r, e);
44837	}
44838
44839	#[simd_test(enable = "avx512f")]
44840	unsafe fn test_mm512_mask_sqrt_ps() {
44841	let a = _mm512_setr_ps(
44842	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
44843	);
44844	let r = _mm512_mask_sqrt_ps(a, `0`, a);
44845	assert_eq_m512(r, a);
44846	let r = _mm512_mask_sqrt_ps(a, `0b00000000_11111111`, a);
44847	let e = _mm512_setr_ps(
44848	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
44849	);
44850	assert_eq_m512(r, e);
44851	}
44852
44853	#[simd_test(enable = "avx512f")]
44854	unsafe fn test_mm512_maskz_sqrt_ps() {
44855	let a = _mm512_setr_ps(
44856	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
44857	);
44858	let r = _mm512_maskz_sqrt_ps(`0`, a);
44859	assert_eq_m512(r, _mm512_setzero_ps());
44860	let r = _mm512_maskz_sqrt_ps(`0b00000000_11111111`, a);
44861	let e = _mm512_setr_ps(
44862	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
44863	);
44864	assert_eq_m512(r, e);
44865	}
44866
44867	#[simd_test(enable = "avx512f,avx512vl")]
44868	unsafe fn test_mm256_mask_sqrt_ps() {
44869	let a = _mm256_set_ps(`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`);
44870	let r = _mm256_mask_sqrt_ps(a, `0`, a);
44871	assert_eq_m256(r, a);
44872	let r = _mm256_mask_sqrt_ps(a, `0b11111111`, a);
44873	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44874	assert_eq_m256(r, e);
44875	}
44876
44877	#[simd_test(enable = "avx512f,avx512vl")]
44878	unsafe fn test_mm256_maskz_sqrt_ps() {
44879	let a = _mm256_set_ps(`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`);
44880	let r = _mm256_maskz_sqrt_ps(`0`, a);
44881	assert_eq_m256(r, _mm256_setzero_ps());
44882	let r = _mm256_maskz_sqrt_ps(`0b11111111`, a);
44883	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44884	assert_eq_m256(r, e);
44885	}
44886
44887	#[simd_test(enable = "avx512f,avx512vl")]
44888	unsafe fn test_mm_mask_sqrt_ps() {
44889	let a = _mm_set_ps(`0.`, `1.`, `4.`, `9.`);
44890	let r = _mm_mask_sqrt_ps(a, `0`, a);
44891	assert_eq_m128(r, a);
44892	let r = _mm_mask_sqrt_ps(a, `0b00001111`, a);
44893	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
44894	assert_eq_m128(r, e);
44895	}
44896
44897	#[simd_test(enable = "avx512f,avx512vl")]
44898	unsafe fn test_mm_maskz_sqrt_ps() {
44899	let a = _mm_set_ps(`0.`, `1.`, `4.`, `9.`);
44900	let r = _mm_maskz_sqrt_ps(`0`, a);
44901	assert_eq_m128(r, _mm_setzero_ps());
44902	let r = _mm_maskz_sqrt_ps(`0b00001111`, a);
44903	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
44904	assert_eq_m128(r, e);
44905	}
44906
44907	#[simd_test(enable = "avx512f")]
44908	unsafe fn test_mm512_fmadd_ps() {
44909	let a = _mm512_set1_ps(`1.`);
44910	let b = _mm512_setr_ps(
44911	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44912	);
44913	let c = _mm512_set1_ps(`1.`);
44914	let r = _mm512_fmadd_ps(a, b, c);
44915	let e = _mm512_setr_ps(
44916	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
44917	);
44918	assert_eq_m512(r, e);
44919	}
44920
44921	#[simd_test(enable = "avx512f")]
44922	unsafe fn test_mm512_mask_fmadd_ps() {
44923	let a = _mm512_set1_ps(`1.`);
44924	let b = _mm512_setr_ps(
44925	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44926	);
44927	let c = _mm512_set1_ps(`1.`);
44928	let r = _mm512_mask_fmadd_ps(a, `0`, b, c);
44929	assert_eq_m512(r, a);
44930	let r = _mm512_mask_fmadd_ps(a, `0b00000000_11111111`, b, c);
44931	let e = _mm512_setr_ps(
44932	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
44933	);
44934	assert_eq_m512(r, e);
44935	}
44936
44937	#[simd_test(enable = "avx512f")]
44938	unsafe fn test_mm512_maskz_fmadd_ps() {
44939	let a = _mm512_set1_ps(`1.`);
44940	let b = _mm512_setr_ps(
44941	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44942	);
44943	let c = _mm512_set1_ps(`1.`);
44944	let r = _mm512_maskz_fmadd_ps(`0`, a, b, c);
44945	assert_eq_m512(r, _mm512_setzero_ps());
44946	let r = _mm512_maskz_fmadd_ps(`0b00000000_11111111`, a, b, c);
44947	let e = _mm512_setr_ps(
44948	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
44949	);
44950	assert_eq_m512(r, e);
44951	}
44952
44953	#[simd_test(enable = "avx512f")]
44954	unsafe fn test_mm512_mask3_fmadd_ps() {
44955	let a = _mm512_set1_ps(`1.`);
44956	let b = _mm512_setr_ps(
44957	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
44958	);
44959	let c = _mm512_set1_ps(`2.`);
44960	let r = _mm512_mask3_fmadd_ps(a, b, c, `0`);
44961	assert_eq_m512(r, c);
44962	let r = _mm512_mask3_fmadd_ps(a, b, c, `0b00000000_11111111`);
44963	let e = _mm512_setr_ps(
44964	`2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
44965	);
44966	assert_eq_m512(r, e);
44967	}
44968
44969	#[simd_test(enable = "avx512f,avx512vl")]
44970	unsafe fn test_mm256_mask_fmadd_ps() {
44971	let a = _mm256_set1_ps(`1.`);
44972	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44973	let c = _mm256_set1_ps(`1.`);
44974	let r = _mm256_mask_fmadd_ps(a, `0`, b, c);
44975	assert_eq_m256(r, a);
44976	let r = _mm256_mask_fmadd_ps(a, `0b11111111`, b, c);
44977	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
44978	assert_eq_m256(r, e);
44979	}
44980
44981	#[simd_test(enable = "avx512f,avx512vl")]
44982	unsafe fn test_mm256_maskz_fmadd_ps() {
44983	let a = _mm256_set1_ps(`1.`);
44984	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44985	let c = _mm256_set1_ps(`1.`);
44986	let r = _mm256_maskz_fmadd_ps(`0`, a, b, c);
44987	assert_eq_m256(r, _mm256_setzero_ps());
44988	let r = _mm256_maskz_fmadd_ps(`0b11111111`, a, b, c);
44989	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
44990	assert_eq_m256(r, e);
44991	}
44992
44993	#[simd_test(enable = "avx512f,avx512vl")]
44994	unsafe fn test_mm256_mask3_fmadd_ps() {
44995	let a = _mm256_set1_ps(`1.`);
44996	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
44997	let c = _mm256_set1_ps(`1.`);
44998	let r = _mm256_mask3_fmadd_ps(a, b, c, `0`);
44999	assert_eq_m256(r, c);
45000	let r = _mm256_mask3_fmadd_ps(a, b, c, `0b11111111`);
45001	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
45002	assert_eq_m256(r, e);
45003	}
45004
45005	#[simd_test(enable = "avx512f,avx512vl")]
45006	unsafe fn test_mm_mask_fmadd_ps() {
45007	let a = _mm_set1_ps(`1.`);
45008	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45009	let c = _mm_set1_ps(`1.`);
45010	let r = _mm_mask_fmadd_ps(a, `0`, b, c);
45011	assert_eq_m128(r, a);
45012	let r = _mm_mask_fmadd_ps(a, `0b00001111`, b, c);
45013	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
45014	assert_eq_m128(r, e);
45015	}
45016
45017	#[simd_test(enable = "avx512f,avx512vl")]
45018	unsafe fn test_mm_maskz_fmadd_ps() {
45019	let a = _mm_set1_ps(`1.`);
45020	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45021	let c = _mm_set1_ps(`1.`);
45022	let r = _mm_maskz_fmadd_ps(`0`, a, b, c);
45023	assert_eq_m128(r, _mm_setzero_ps());
45024	let r = _mm_maskz_fmadd_ps(`0b00001111`, a, b, c);
45025	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
45026	assert_eq_m128(r, e);
45027	}
45028
45029	#[simd_test(enable = "avx512f,avx512vl")]
45030	unsafe fn test_mm_mask3_fmadd_ps() {
45031	let a = _mm_set1_ps(`1.`);
45032	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45033	let c = _mm_set1_ps(`1.`);
45034	let r = _mm_mask3_fmadd_ps(a, b, c, `0`);
45035	assert_eq_m128(r, c);
45036	let r = _mm_mask3_fmadd_ps(a, b, c, `0b00001111`);
45037	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
45038	assert_eq_m128(r, e);
45039	}
45040
45041	#[simd_test(enable = "avx512f")]
45042	unsafe fn test_mm512_fmsub_ps() {
45043	let a = _mm512_setr_ps(
45044	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45045	);
45046	let b = _mm512_setr_ps(
45047	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45048	);
45049	let c = _mm512_setr_ps(
45050	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45051	);
45052	let r = _mm512_fmsub_ps(a, b, c);
45053	let e = _mm512_setr_ps(
45054	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`,
45055	);
45056	assert_eq_m512(r, e);
45057	}
45058
45059	#[simd_test(enable = "avx512f")]
45060	unsafe fn test_mm512_mask_fmsub_ps() {
45061	let a = _mm512_set1_ps(`1.`);
45062	let b = _mm512_setr_ps(
45063	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45064	);
45065	let c = _mm512_set1_ps(`1.`);
45066	let r = _mm512_mask_fmsub_ps(a, `0`, b, c);
45067	assert_eq_m512(r, a);
45068	let r = _mm512_mask_fmsub_ps(a, `0b00000000_11111111`, b, c);
45069	let e = _mm512_setr_ps(
45070	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45071	);
45072	assert_eq_m512(r, e);
45073	}
45074
45075	#[simd_test(enable = "avx512f")]
45076	unsafe fn test_mm512_maskz_fmsub_ps() {
45077	let a = _mm512_set1_ps(`1.`);
45078	let b = _mm512_setr_ps(
45079	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45080	);
45081	let c = _mm512_set1_ps(`1.`);
45082	let r = _mm512_maskz_fmsub_ps(`0`, a, b, c);
45083	assert_eq_m512(r, _mm512_setzero_ps());
45084	let r = _mm512_maskz_fmsub_ps(`0b00000000_11111111`, a, b, c);
45085	let e = _mm512_setr_ps(
45086	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45087	);
45088	assert_eq_m512(r, e);
45089	}
45090
45091	#[simd_test(enable = "avx512f")]
45092	unsafe fn test_mm512_mask3_fmsub_ps() {
45093	let a = _mm512_set1_ps(`1.`);
45094	let b = _mm512_setr_ps(
45095	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45096	);
45097	let c = _mm512_setr_ps(
45098	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45099	);
45100	let r = _mm512_mask3_fmsub_ps(a, b, c, `0`);
45101	assert_eq_m512(r, c);
45102	let r = _mm512_mask3_fmsub_ps(a, b, c, `0b00000000_11111111`);
45103	let e = _mm512_setr_ps(
45104	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45105	);
45106	assert_eq_m512(r, e);
45107	}
45108
45109	#[simd_test(enable = "avx512f,avx512vl")]
45110	unsafe fn test_mm256_mask_fmsub_ps() {
45111	let a = _mm256_set1_ps(`1.`);
45112	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45113	let c = _mm256_set1_ps(`1.`);
45114	let r = _mm256_mask_fmsub_ps(a, `0`, b, c);
45115	assert_eq_m256(r, a);
45116	let r = _mm256_mask_fmsub_ps(a, `0b11111111`, b, c);
45117	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
45118	assert_eq_m256(r, e);
45119	}
45120
45121	#[simd_test(enable = "avx512f,avx512vl")]
45122	unsafe fn test_mm256_maskz_fmsub_ps() {
45123	let a = _mm256_set1_ps(`1.`);
45124	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45125	let c = _mm256_set1_ps(`1.`);
45126	let r = _mm256_maskz_fmsub_ps(`0`, a, b, c);
45127	assert_eq_m256(r, _mm256_setzero_ps());
45128	let r = _mm256_maskz_fmsub_ps(`0b11111111`, a, b, c);
45129	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
45130	assert_eq_m256(r, e);
45131	}
45132
45133	#[simd_test(enable = "avx512f,avx512vl")]
45134	unsafe fn test_mm256_mask3_fmsub_ps() {
45135	let a = _mm256_set1_ps(`1.`);
45136	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45137	let c = _mm256_set1_ps(`1.`);
45138	let r = _mm256_mask3_fmsub_ps(a, b, c, `0`);
45139	assert_eq_m256(r, c);
45140	let r = _mm256_mask3_fmsub_ps(a, b, c, `0b11111111`);
45141	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
45142	assert_eq_m256(r, e);
45143	}
45144
45145	#[simd_test(enable = "avx512f,avx512vl")]
45146	unsafe fn test_mm_mask_fmsub_ps() {
45147	let a = _mm_set1_ps(`1.`);
45148	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45149	let c = _mm_set1_ps(`1.`);
45150	let r = _mm_mask_fmsub_ps(a, `0`, b, c);
45151	assert_eq_m128(r, a);
45152	let r = _mm_mask_fmsub_ps(a, `0b00001111`, b, c);
45153	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
45154	assert_eq_m128(r, e);
45155	}
45156
45157	#[simd_test(enable = "avx512f,avx512vl")]
45158	unsafe fn test_mm_maskz_fmsub_ps() {
45159	let a = _mm_set1_ps(`1.`);
45160	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45161	let c = _mm_set1_ps(`1.`);
45162	let r = _mm_maskz_fmsub_ps(`0`, a, b, c);
45163	assert_eq_m128(r, _mm_setzero_ps());
45164	let r = _mm_maskz_fmsub_ps(`0b00001111`, a, b, c);
45165	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
45166	assert_eq_m128(r, e);
45167	}
45168
45169	#[simd_test(enable = "avx512f,avx512vl")]
45170	unsafe fn test_mm_mask3_fmsub_ps() {
45171	let a = _mm_set1_ps(`1.`);
45172	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45173	let c = _mm_set1_ps(`1.`);
45174	let r = _mm_mask3_fmsub_ps(a, b, c, `0`);
45175	assert_eq_m128(r, c);
45176	let r = _mm_mask3_fmsub_ps(a, b, c, `0b00001111`);
45177	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
45178	assert_eq_m128(r, e);
45179	}
45180
45181	#[simd_test(enable = "avx512f")]
45182	unsafe fn test_mm512_fmaddsub_ps() {
45183	let a = _mm512_set1_ps(`1.`);
45184	let b = _mm512_setr_ps(
45185	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45186	);
45187	let c = _mm512_set1_ps(`1.`);
45188	let r = _mm512_fmaddsub_ps(a, b, c);
45189	let e = _mm512_setr_ps(
45190	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`,
45191	);
45192	assert_eq_m512(r, e);
45193	}
45194
45195	#[simd_test(enable = "avx512f")]
45196	unsafe fn test_mm512_mask_fmaddsub_ps() {
45197	let a = _mm512_set1_ps(`1.`);
45198	let b = _mm512_setr_ps(
45199	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45200	);
45201	let c = _mm512_set1_ps(`1.`);
45202	let r = _mm512_mask_fmaddsub_ps(a, `0`, b, c);
45203	assert_eq_m512(r, a);
45204	let r = _mm512_mask_fmaddsub_ps(a, `0b00000000_11111111`, b, c);
45205	let e = _mm512_setr_ps(
45206	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45207	);
45208	assert_eq_m512(r, e);
45209	}
45210
45211	#[simd_test(enable = "avx512f")]
45212	unsafe fn test_mm512_maskz_fmaddsub_ps() {
45213	let a = _mm512_set1_ps(`1.`);
45214	let b = _mm512_setr_ps(
45215	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45216	);
45217	let c = _mm512_set1_ps(`1.`);
45218	let r = _mm512_maskz_fmaddsub_ps(`0`, a, b, c);
45219	assert_eq_m512(r, _mm512_setzero_ps());
45220	let r = _mm512_maskz_fmaddsub_ps(`0b00000000_11111111`, a, b, c);
45221	let e = _mm512_setr_ps(
45222	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45223	);
45224	assert_eq_m512(r, e);
45225	}
45226
45227	#[simd_test(enable = "avx512f")]
45228	unsafe fn test_mm512_mask3_fmaddsub_ps() {
45229	let a = _mm512_set1_ps(`1.`);
45230	let b = _mm512_setr_ps(
45231	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45232	);
45233	let c = _mm512_setr_ps(
45234	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45235	);
45236	let r = _mm512_mask3_fmaddsub_ps(a, b, c, `0`);
45237	assert_eq_m512(r, c);
45238	let r = _mm512_mask3_fmaddsub_ps(a, b, c, `0b00000000_11111111`);
45239	let e = _mm512_setr_ps(
45240	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45241	);
45242	assert_eq_m512(r, e);
45243	}
45244
45245	#[simd_test(enable = "avx512f,avx512vl")]
45246	unsafe fn test_mm256_mask_fmaddsub_ps() {
45247	let a = _mm256_set1_ps(`1.`);
45248	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45249	let c = _mm256_set1_ps(`1.`);
45250	let r = _mm256_mask_fmaddsub_ps(a, `0`, b, c);
45251	assert_eq_m256(r, a);
45252	let r = _mm256_mask_fmaddsub_ps(a, `0b11111111`, b, c);
45253	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
45254	assert_eq_m256(r, e);
45255	}
45256
45257	#[simd_test(enable = "avx512f,avx512vl")]
45258	unsafe fn test_mm256_maskz_fmaddsub_ps() {
45259	let a = _mm256_set1_ps(`1.`);
45260	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45261	let c = _mm256_set1_ps(`1.`);
45262	let r = _mm256_maskz_fmaddsub_ps(`0`, a, b, c);
45263	assert_eq_m256(r, _mm256_setzero_ps());
45264	let r = _mm256_maskz_fmaddsub_ps(`0b11111111`, a, b, c);
45265	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
45266	assert_eq_m256(r, e);
45267	}
45268
45269	#[simd_test(enable = "avx512f,avx512vl")]
45270	unsafe fn test_mm256_mask3_fmaddsub_ps() {
45271	let a = _mm256_set1_ps(`1.`);
45272	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45273	let c = _mm256_set1_ps(`1.`);
45274	let r = _mm256_mask3_fmaddsub_ps(a, b, c, `0`);
45275	assert_eq_m256(r, c);
45276	let r = _mm256_mask3_fmaddsub_ps(a, b, c, `0b11111111`);
45277	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
45278	assert_eq_m256(r, e);
45279	}
45280
45281	#[simd_test(enable = "avx512f,avx512vl")]
45282	unsafe fn test_mm_mask_fmaddsub_ps() {
45283	let a = _mm_set1_ps(`1.`);
45284	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45285	let c = _mm_set1_ps(`1.`);
45286	let r = _mm_mask_fmaddsub_ps(a, `0`, b, c);
45287	assert_eq_m128(r, a);
45288	let r = _mm_mask_fmaddsub_ps(a, `0b00001111`, b, c);
45289	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
45290	assert_eq_m128(r, e);
45291	}
45292
45293	#[simd_test(enable = "avx512f,avx512vl")]
45294	unsafe fn test_mm_maskz_fmaddsub_ps() {
45295	let a = _mm_set1_ps(`1.`);
45296	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45297	let c = _mm_set1_ps(`1.`);
45298	let r = _mm_maskz_fmaddsub_ps(`0`, a, b, c);
45299	assert_eq_m128(r, _mm_setzero_ps());
45300	let r = _mm_maskz_fmaddsub_ps(`0b00001111`, a, b, c);
45301	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
45302	assert_eq_m128(r, e);
45303	}
45304
45305	#[simd_test(enable = "avx512f,avx512vl")]
45306	unsafe fn test_mm_mask3_fmaddsub_ps() {
45307	let a = _mm_set1_ps(`1.`);
45308	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45309	let c = _mm_set1_ps(`1.`);
45310	let r = _mm_mask3_fmaddsub_ps(a, b, c, `0`);
45311	assert_eq_m128(r, c);
45312	let r = _mm_mask3_fmaddsub_ps(a, b, c, `0b00001111`);
45313	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
45314	assert_eq_m128(r, e);
45315	}
45316
45317	#[simd_test(enable = "avx512f")]
45318	unsafe fn test_mm512_fmsubadd_ps() {
45319	let a = _mm512_setr_ps(
45320	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45321	);
45322	let b = _mm512_setr_ps(
45323	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45324	);
45325	let c = _mm512_setr_ps(
45326	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45327	);
45328	let r = _mm512_fmsubadd_ps(a, b, c);
45329	let e = _mm512_setr_ps(
45330	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `9.`, `8.`, `11.`, `10.`, `13.`, `12.`, `15.`, `14.`,
45331	);
45332	assert_eq_m512(r, e);
45333	}
45334
45335	#[simd_test(enable = "avx512f")]
45336	unsafe fn test_mm512_mask_fmsubadd_ps() {
45337	let a = _mm512_set1_ps(`1.`);
45338	let b = _mm512_setr_ps(
45339	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45340	);
45341	let c = _mm512_set1_ps(`1.`);
45342	let r = _mm512_mask_fmsubadd_ps(a, `0`, b, c);
45343	assert_eq_m512(r, a);
45344	let r = _mm512_mask_fmsubadd_ps(a, `0b00000000_11111111`, b, c);
45345	let e = _mm512_setr_ps(
45346	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45347	);
45348	assert_eq_m512(r, e);
45349	}
45350
45351	#[simd_test(enable = "avx512f")]
45352	unsafe fn test_mm512_maskz_fmsubadd_ps() {
45353	let a = _mm512_set1_ps(`1.`);
45354	let b = _mm512_setr_ps(
45355	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45356	);
45357	let c = _mm512_set1_ps(`1.`);
45358	let r = _mm512_maskz_fmsubadd_ps(`0`, a, b, c);
45359	assert_eq_m512(r, _mm512_setzero_ps());
45360	let r = _mm512_maskz_fmsubadd_ps(`0b00000000_11111111`, a, b, c);
45361	let e = _mm512_setr_ps(
45362	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45363	);
45364	assert_eq_m512(r, e);
45365	}
45366
45367	#[simd_test(enable = "avx512f")]
45368	unsafe fn test_mm512_mask3_fmsubadd_ps() {
45369	let a = _mm512_set1_ps(`1.`);
45370	let b = _mm512_setr_ps(
45371	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45372	);
45373	let c = _mm512_setr_ps(
45374	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45375	);
45376	let r = _mm512_mask3_fmsubadd_ps(a, b, c, `0`);
45377	assert_eq_m512(r, c);
45378	let r = _mm512_mask3_fmsubadd_ps(a, b, c, `0b00000000_11111111`);
45379	let e = _mm512_setr_ps(
45380	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45381	);
45382	assert_eq_m512(r, e);
45383	}
45384
45385	#[simd_test(enable = "avx512f,avx512vl")]
45386	unsafe fn test_mm256_mask_fmsubadd_ps() {
45387	let a = _mm256_set1_ps(`1.`);
45388	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45389	let c = _mm256_set1_ps(`1.`);
45390	let r = _mm256_mask_fmsubadd_ps(a, `0`, b, c);
45391	assert_eq_m256(r, a);
45392	let r = _mm256_mask_fmsubadd_ps(a, `0b11111111`, b, c);
45393	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
45394	assert_eq_m256(r, e);
45395	}
45396
45397	#[simd_test(enable = "avx512f,avx512vl")]
45398	unsafe fn test_mm256_maskz_fmsubadd_ps() {
45399	let a = _mm256_set1_ps(`1.`);
45400	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45401	let c = _mm256_set1_ps(`1.`);
45402	let r = _mm256_maskz_fmsubadd_ps(`0`, a, b, c);
45403	assert_eq_m256(r, _mm256_setzero_ps());
45404	let r = _mm256_maskz_fmsubadd_ps(`0b11111111`, a, b, c);
45405	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
45406	assert_eq_m256(r, e);
45407	}
45408
45409	#[simd_test(enable = "avx512f,avx512vl")]
45410	unsafe fn test_mm256_mask3_fmsubadd_ps() {
45411	let a = _mm256_set1_ps(`1.`);
45412	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45413	let c = _mm256_set1_ps(`1.`);
45414	let r = _mm256_mask3_fmsubadd_ps(a, b, c, `0`);
45415	assert_eq_m256(r, c);
45416	let r = _mm256_mask3_fmsubadd_ps(a, b, c, `0b11111111`);
45417	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
45418	assert_eq_m256(r, e);
45419	}
45420
45421	#[simd_test(enable = "avx512f,avx512vl")]
45422	unsafe fn test_mm_mask_fmsubadd_ps() {
45423	let a = _mm_set1_ps(`1.`);
45424	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45425	let c = _mm_set1_ps(`1.`);
45426	let r = _mm_mask_fmsubadd_ps(a, `0`, b, c);
45427	assert_eq_m128(r, a);
45428	let r = _mm_mask_fmsubadd_ps(a, `0b00001111`, b, c);
45429	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
45430	assert_eq_m128(r, e);
45431	}
45432
45433	#[simd_test(enable = "avx512f,avx512vl")]
45434	unsafe fn test_mm_maskz_fmsubadd_ps() {
45435	let a = _mm_set1_ps(`1.`);
45436	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45437	let c = _mm_set1_ps(`1.`);
45438	let r = _mm_maskz_fmsubadd_ps(`0`, a, b, c);
45439	assert_eq_m128(r, _mm_setzero_ps());
45440	let r = _mm_maskz_fmsubadd_ps(`0b00001111`, a, b, c);
45441	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
45442	assert_eq_m128(r, e);
45443	}
45444
45445	#[simd_test(enable = "avx512f,avx512vl")]
45446	unsafe fn test_mm_mask3_fmsubadd_ps() {
45447	let a = _mm_set1_ps(`1.`);
45448	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45449	let c = _mm_set1_ps(`1.`);
45450	let r = _mm_mask3_fmsubadd_ps(a, b, c, `0`);
45451	assert_eq_m128(r, c);
45452	let r = _mm_mask3_fmsubadd_ps(a, b, c, `0b00001111`);
45453	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
45454	assert_eq_m128(r, e);
45455	}
45456
45457	#[simd_test(enable = "avx512f")]
45458	unsafe fn test_mm512_fnmadd_ps() {
45459	let a = _mm512_set1_ps(`1.`);
45460	let b = _mm512_setr_ps(
45461	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45462	);
45463	let c = _mm512_set1_ps(`1.`);
45464	let r = _mm512_fnmadd_ps(a, b, c);
45465	let e = _mm512_setr_ps(
45466	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `-9.`, `-10.`, `-11.`, `-12.`, `-13.`, `-14.`,
45467	);
45468	assert_eq_m512(r, e);
45469	}
45470
45471	#[simd_test(enable = "avx512f")]
45472	unsafe fn test_mm512_mask_fnmadd_ps() {
45473	let a = _mm512_set1_ps(`1.`);
45474	let b = _mm512_setr_ps(
45475	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45476	);
45477	let c = _mm512_set1_ps(`1.`);
45478	let r = _mm512_mask_fnmadd_ps(a, `0`, b, c);
45479	assert_eq_m512(r, a);
45480	let r = _mm512_mask_fnmadd_ps(a, `0b00000000_11111111`, b, c);
45481	let e = _mm512_setr_ps(
45482	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45483	);
45484	assert_eq_m512(r, e);
45485	}
45486
45487	#[simd_test(enable = "avx512f")]
45488	unsafe fn test_mm512_maskz_fnmadd_ps() {
45489	let a = _mm512_set1_ps(`1.`);
45490	let b = _mm512_setr_ps(
45491	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45492	);
45493	let c = _mm512_set1_ps(`1.`);
45494	let r = _mm512_maskz_fnmadd_ps(`0`, a, b, c);
45495	assert_eq_m512(r, _mm512_setzero_ps());
45496	let r = _mm512_maskz_fnmadd_ps(`0b00000000_11111111`, a, b, c);
45497	let e = _mm512_setr_ps(
45498	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45499	);
45500	assert_eq_m512(r, e);
45501	}
45502
45503	#[simd_test(enable = "avx512f")]
45504	unsafe fn test_mm512_mask3_fnmadd_ps() {
45505	let a = _mm512_set1_ps(`1.`);
45506	let b = _mm512_setr_ps(
45507	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45508	);
45509	let c = _mm512_setr_ps(
45510	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45511	);
45512	let r = _mm512_mask3_fnmadd_ps(a, b, c, `0`);
45513	assert_eq_m512(r, c);
45514	let r = _mm512_mask3_fnmadd_ps(a, b, c, `0b00000000_11111111`);
45515	let e = _mm512_setr_ps(
45516	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45517	);
45518	assert_eq_m512(r, e);
45519	}
45520
45521	#[simd_test(enable = "avx512f,avx512vl")]
45522	unsafe fn test_mm256_mask_fnmadd_ps() {
45523	let a = _mm256_set1_ps(`1.`);
45524	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45525	let c = _mm256_set1_ps(`1.`);
45526	let r = _mm256_mask_fnmadd_ps(a, `0`, b, c);
45527	assert_eq_m256(r, a);
45528	let r = _mm256_mask_fnmadd_ps(a, `0b11111111`, b, c);
45529	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
45530	assert_eq_m256(r, e);
45531	}
45532
45533	#[simd_test(enable = "avx512f,avx512vl")]
45534	unsafe fn test_mm256_maskz_fnmadd_ps() {
45535	let a = _mm256_set1_ps(`1.`);
45536	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45537	let c = _mm256_set1_ps(`1.`);
45538	let r = _mm256_maskz_fnmadd_ps(`0`, a, b, c);
45539	assert_eq_m256(r, _mm256_setzero_ps());
45540	let r = _mm256_maskz_fnmadd_ps(`0b11111111`, a, b, c);
45541	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
45542	assert_eq_m256(r, e);
45543	}
45544
45545	#[simd_test(enable = "avx512f,avx512vl")]
45546	unsafe fn test_mm256_mask3_fnmadd_ps() {
45547	let a = _mm256_set1_ps(`1.`);
45548	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45549	let c = _mm256_set1_ps(`1.`);
45550	let r = _mm256_mask3_fnmadd_ps(a, b, c, `0`);
45551	assert_eq_m256(r, c);
45552	let r = _mm256_mask3_fnmadd_ps(a, b, c, `0b11111111`);
45553	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
45554	assert_eq_m256(r, e);
45555	}
45556
45557	#[simd_test(enable = "avx512f,avx512vl")]
45558	unsafe fn test_mm_mask_fnmadd_ps() {
45559	let a = _mm_set1_ps(`1.`);
45560	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45561	let c = _mm_set1_ps(`1.`);
45562	let r = _mm_mask_fnmadd_ps(a, `0`, b, c);
45563	assert_eq_m128(r, a);
45564	let r = _mm_mask_fnmadd_ps(a, `0b00001111`, b, c);
45565	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
45566	assert_eq_m128(r, e);
45567	}
45568
45569	#[simd_test(enable = "avx512f,avx512vl")]
45570	unsafe fn test_mm_maskz_fnmadd_ps() {
45571	let a = _mm_set1_ps(`1.`);
45572	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45573	let c = _mm_set1_ps(`1.`);
45574	let r = _mm_maskz_fnmadd_ps(`0`, a, b, c);
45575	assert_eq_m128(r, _mm_setzero_ps());
45576	let r = _mm_maskz_fnmadd_ps(`0b00001111`, a, b, c);
45577	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
45578	assert_eq_m128(r, e);
45579	}
45580
45581	#[simd_test(enable = "avx512f,avx512vl")]
45582	unsafe fn test_mm_mask3_fnmadd_ps() {
45583	let a = _mm_set1_ps(`1.`);
45584	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45585	let c = _mm_set1_ps(`1.`);
45586	let r = _mm_mask3_fnmadd_ps(a, b, c, `0`);
45587	assert_eq_m128(r, c);
45588	let r = _mm_mask3_fnmadd_ps(a, b, c, `0b00001111`);
45589	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
45590	assert_eq_m128(r, e);
45591	}
45592
45593	#[simd_test(enable = "avx512f")]
45594	unsafe fn test_mm512_fnmsub_ps() {
45595	let a = _mm512_set1_ps(`1.`);
45596	let b = _mm512_setr_ps(
45597	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45598	);
45599	let c = _mm512_set1_ps(`1.`);
45600	let r = _mm512_fnmsub_ps(a, b, c);
45601	let e = _mm512_setr_ps(
45602	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `-9.`, `-10.`, `-11.`, `-12.`, `-13.`, `-14.`, `-15.`, `-16.`,
45603	);
45604	assert_eq_m512(r, e);
45605	}
45606
45607	#[simd_test(enable = "avx512f")]
45608	unsafe fn test_mm512_mask_fnmsub_ps() {
45609	let a = _mm512_set1_ps(`1.`);
45610	let b = _mm512_setr_ps(
45611	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45612	);
45613	let c = _mm512_set1_ps(`1.`);
45614	let r = _mm512_mask_fnmsub_ps(a, `0`, b, c);
45615	assert_eq_m512(r, a);
45616	let r = _mm512_mask_fnmsub_ps(a, `0b00000000_11111111`, b, c);
45617	let e = _mm512_setr_ps(
45618	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45619	);
45620	assert_eq_m512(r, e);
45621	}
45622
45623	#[simd_test(enable = "avx512f")]
45624	unsafe fn test_mm512_maskz_fnmsub_ps() {
45625	let a = _mm512_set1_ps(`1.`);
45626	let b = _mm512_setr_ps(
45627	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45628	);
45629	let c = _mm512_set1_ps(`1.`);
45630	let r = _mm512_maskz_fnmsub_ps(`0`, a, b, c);
45631	assert_eq_m512(r, _mm512_setzero_ps());
45632	let r = _mm512_maskz_fnmsub_ps(`0b00000000_11111111`, a, b, c);
45633	let e = _mm512_setr_ps(
45634	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
45635	);
45636	assert_eq_m512(r, e);
45637	}
45638
45639	#[simd_test(enable = "avx512f")]
45640	unsafe fn test_mm512_mask3_fnmsub_ps() {
45641	let a = _mm512_set1_ps(`1.`);
45642	let b = _mm512_setr_ps(
45643	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
45644	);
45645	let c = _mm512_setr_ps(
45646	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45647	);
45648	let r = _mm512_mask3_fnmsub_ps(a, b, c, `0`);
45649	assert_eq_m512(r, c);
45650	let r = _mm512_mask3_fnmsub_ps(a, b, c, `0b00000000_11111111`);
45651	let e = _mm512_setr_ps(
45652	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
45653	);
45654	assert_eq_m512(r, e);
45655	}
45656
45657	#[simd_test(enable = "avx512f,avx512vl")]
45658	unsafe fn test_mm256_mask_fnmsub_ps() {
45659	let a = _mm256_set1_ps(`1.`);
45660	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45661	let c = _mm256_set1_ps(`1.`);
45662	let r = _mm256_mask_fnmsub_ps(a, `0`, b, c);
45663	assert_eq_m256(r, a);
45664	let r = _mm256_mask_fnmsub_ps(a, `0b11111111`, b, c);
45665	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
45666	assert_eq_m256(r, e);
45667	}
45668
45669	#[simd_test(enable = "avx512f,avx512vl")]
45670	unsafe fn test_mm256_maskz_fnmsub_ps() {
45671	let a = _mm256_set1_ps(`1.`);
45672	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45673	let c = _mm256_set1_ps(`1.`);
45674	let r = _mm256_maskz_fnmsub_ps(`0`, a, b, c);
45675	assert_eq_m256(r, _mm256_setzero_ps());
45676	let r = _mm256_maskz_fnmsub_ps(`0b11111111`, a, b, c);
45677	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
45678	assert_eq_m256(r, e);
45679	}
45680
45681	#[simd_test(enable = "avx512f,avx512vl")]
45682	unsafe fn test_mm256_mask3_fnmsub_ps() {
45683	let a = _mm256_set1_ps(`1.`);
45684	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
45685	let c = _mm256_set1_ps(`1.`);
45686	let r = _mm256_mask3_fnmsub_ps(a, b, c, `0`);
45687	assert_eq_m256(r, c);
45688	let r = _mm256_mask3_fnmsub_ps(a, b, c, `0b11111111`);
45689	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
45690	assert_eq_m256(r, e);
45691	}
45692
45693	#[simd_test(enable = "avx512f,avx512vl")]
45694	unsafe fn test_mm_mask_fnmsub_ps() {
45695	let a = _mm_set1_ps(`1.`);
45696	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45697	let c = _mm_set1_ps(`1.`);
45698	let r = _mm_mask_fnmsub_ps(a, `0`, b, c);
45699	assert_eq_m128(r, a);
45700	let r = _mm_mask_fnmsub_ps(a, `0b00001111`, b, c);
45701	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
45702	assert_eq_m128(r, e);
45703	}
45704
45705	#[simd_test(enable = "avx512f,avx512vl")]
45706	unsafe fn test_mm_maskz_fnmsub_ps() {
45707	let a = _mm_set1_ps(`1.`);
45708	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45709	let c = _mm_set1_ps(`1.`);
45710	let r = _mm_maskz_fnmsub_ps(`0`, a, b, c);
45711	assert_eq_m128(r, _mm_setzero_ps());
45712	let r = _mm_maskz_fnmsub_ps(`0b00001111`, a, b, c);
45713	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
45714	assert_eq_m128(r, e);
45715	}
45716
45717	#[simd_test(enable = "avx512f,avx512vl")]
45718	unsafe fn test_mm_mask3_fnmsub_ps() {
45719	let a = _mm_set1_ps(`1.`);
45720	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
45721	let c = _mm_set1_ps(`1.`);
45722	let r = _mm_mask3_fnmsub_ps(a, b, c, `0`);
45723	assert_eq_m128(r, c);
45724	let r = _mm_mask3_fnmsub_ps(a, b, c, `0b00001111`);
45725	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
45726	assert_eq_m128(r, e);
45727	}
45728
45729	#[simd_test(enable = "avx512f")]
45730	unsafe fn test_mm512_rcp14_ps() {
45731	let a = _mm512_set1_ps(`3.`);
45732	let r = _mm512_rcp14_ps(a);
45733	let e = _mm512_set1_ps(`0.33333206`);
45734	assert_eq_m512(r, e);
45735	}
45736
45737	#[simd_test(enable = "avx512f")]
45738	unsafe fn test_mm512_mask_rcp14_ps() {
45739	let a = _mm512_set1_ps(`3.`);
45740	let r = _mm512_mask_rcp14_ps(a, `0`, a);
45741	assert_eq_m512(r, a);
45742	let r = _mm512_mask_rcp14_ps(a, `0b11111111_00000000`, a);
45743	let e = _mm512_setr_ps(
45744	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
45745	`0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
45746	);
45747	assert_eq_m512(r, e);
45748	}
45749
45750	#[simd_test(enable = "avx512f")]
45751	unsafe fn test_mm512_maskz_rcp14_ps() {
45752	let a = _mm512_set1_ps(`3.`);
45753	let r = _mm512_maskz_rcp14_ps(`0`, a);
45754	assert_eq_m512(r, _mm512_setzero_ps());
45755	let r = _mm512_maskz_rcp14_ps(`0b11111111_00000000`, a);
45756	let e = _mm512_setr_ps(
45757	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
45758	`0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
45759	);
45760	assert_eq_m512(r, e);
45761	}
45762
45763	#[simd_test(enable = "avx512f,avx512vl")]
45764	unsafe fn test_mm256_rcp14_ps() {
45765	let a = _mm256_set1_ps(`3.`);
45766	let r = _mm256_rcp14_ps(a);
45767	let e = _mm256_set1_ps(`0.33333206`);
45768	assert_eq_m256(r, e);
45769	}
45770
45771	#[simd_test(enable = "avx512f,avx512vl")]
45772	unsafe fn test_mm256_mask_rcp14_ps() {
45773	let a = _mm256_set1_ps(`3.`);
45774	let r = _mm256_mask_rcp14_ps(a, `0`, a);
45775	assert_eq_m256(r, a);
45776	let r = _mm256_mask_rcp14_ps(a, `0b11111111`, a);
45777	let e = _mm256_set1_ps(`0.33333206`);
45778	assert_eq_m256(r, e);
45779	}
45780
45781	#[simd_test(enable = "avx512f,avx512vl")]
45782	unsafe fn test_mm256_maskz_rcp14_ps() {
45783	let a = _mm256_set1_ps(`3.`);
45784	let r = _mm256_maskz_rcp14_ps(`0`, a);
45785	assert_eq_m256(r, _mm256_setzero_ps());
45786	let r = _mm256_maskz_rcp14_ps(`0b11111111`, a);
45787	let e = _mm256_set1_ps(`0.33333206`);
45788	assert_eq_m256(r, e);
45789	}
45790
45791	#[simd_test(enable = "avx512f,avx512vl")]
45792	unsafe fn test_mm_rcp14_ps() {
45793	let a = _mm_set1_ps(`3.`);
45794	let r = _mm_rcp14_ps(a);
45795	let e = _mm_set1_ps(`0.33333206`);
45796	assert_eq_m128(r, e);
45797	}
45798
45799	#[simd_test(enable = "avx512f,avx512vl")]
45800	unsafe fn test_mm_mask_rcp14_ps() {
45801	let a = _mm_set1_ps(`3.`);
45802	let r = _mm_mask_rcp14_ps(a, `0`, a);
45803	assert_eq_m128(r, a);
45804	let r = _mm_mask_rcp14_ps(a, `0b00001111`, a);
45805	let e = _mm_set1_ps(`0.33333206`);
45806	assert_eq_m128(r, e);
45807	}
45808
45809	#[simd_test(enable = "avx512f,avx512vl")]
45810	unsafe fn test_mm_maskz_rcp14_ps() {
45811	let a = _mm_set1_ps(`3.`);
45812	let r = _mm_maskz_rcp14_ps(`0`, a);
45813	assert_eq_m128(r, _mm_setzero_ps());
45814	let r = _mm_maskz_rcp14_ps(`0b00001111`, a);
45815	let e = _mm_set1_ps(`0.33333206`);
45816	assert_eq_m128(r, e);
45817	}
45818
45819	#[simd_test(enable = "avx512f")]
45820	unsafe fn test_mm512_rsqrt14_ps() {
45821	let a = _mm512_set1_ps(`3.`);
45822	let r = _mm512_rsqrt14_ps(a);
45823	let e = _mm512_set1_ps(`0.5773392`);
45824	assert_eq_m512(r, e);
45825	}
45826
45827	#[simd_test(enable = "avx512f")]
45828	unsafe fn test_mm512_mask_rsqrt14_ps() {
45829	let a = _mm512_set1_ps(`3.`);
45830	let r = _mm512_mask_rsqrt14_ps(a, `0`, a);
45831	assert_eq_m512(r, a);
45832	let r = _mm512_mask_rsqrt14_ps(a, `0b11111111_00000000`, a);
45833	let e = _mm512_setr_ps(
45834	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`,
45835	`0.5773392`, `0.5773392`, `0.5773392`,
45836	);
45837	assert_eq_m512(r, e);
45838	}
45839
45840	#[simd_test(enable = "avx512f")]
45841	unsafe fn test_mm512_maskz_rsqrt14_ps() {
45842	let a = _mm512_set1_ps(`3.`);
45843	let r = _mm512_maskz_rsqrt14_ps(`0`, a);
45844	assert_eq_m512(r, _mm512_setzero_ps());
45845	let r = _mm512_maskz_rsqrt14_ps(`0b11111111_00000000`, a);
45846	let e = _mm512_setr_ps(
45847	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`,
45848	`0.5773392`, `0.5773392`, `0.5773392`,
45849	);
45850	assert_eq_m512(r, e);
45851	}
45852
45853	#[simd_test(enable = "avx512f,avx512vl")]
45854	unsafe fn test_mm256_rsqrt14_ps() {
45855	let a = _mm256_set1_ps(`3.`);
45856	let r = _mm256_rsqrt14_ps(a);
45857	let e = _mm256_set1_ps(`0.5773392`);
45858	assert_eq_m256(r, e);
45859	}
45860
45861	#[simd_test(enable = "avx512f,avx512vl")]
45862	unsafe fn test_mm256_mask_rsqrt14_ps() {
45863	let a = _mm256_set1_ps(`3.`);
45864	let r = _mm256_mask_rsqrt14_ps(a, `0`, a);
45865	assert_eq_m256(r, a);
45866	let r = _mm256_mask_rsqrt14_ps(a, `0b11111111`, a);
45867	let e = _mm256_set1_ps(`0.5773392`);
45868	assert_eq_m256(r, e);
45869	}
45870
45871	#[simd_test(enable = "avx512f,avx512vl")]
45872	unsafe fn test_mm256_maskz_rsqrt14_ps() {
45873	let a = _mm256_set1_ps(`3.`);
45874	let r = _mm256_maskz_rsqrt14_ps(`0`, a);
45875	assert_eq_m256(r, _mm256_setzero_ps());
45876	let r = _mm256_maskz_rsqrt14_ps(`0b11111111`, a);
45877	let e = _mm256_set1_ps(`0.5773392`);
45878	assert_eq_m256(r, e);
45879	}
45880
45881	#[simd_test(enable = "avx512f,avx512vl")]
45882	unsafe fn test_mm_rsqrt14_ps() {
45883	let a = _mm_set1_ps(`3.`);
45884	let r = _mm_rsqrt14_ps(a);
45885	let e = _mm_set1_ps(`0.5773392`);
45886	assert_eq_m128(r, e);
45887	}
45888
45889	#[simd_test(enable = "avx512f,avx512vl")]
45890	unsafe fn test_mm_mask_rsqrt14_ps() {
45891	let a = _mm_set1_ps(`3.`);
45892	let r = _mm_mask_rsqrt14_ps(a, `0`, a);
45893	assert_eq_m128(r, a);
45894	let r = _mm_mask_rsqrt14_ps(a, `0b00001111`, a);
45895	let e = _mm_set1_ps(`0.5773392`);
45896	assert_eq_m128(r, e);
45897	}
45898
45899	#[simd_test(enable = "avx512f,avx512vl")]
45900	unsafe fn test_mm_maskz_rsqrt14_ps() {
45901	let a = _mm_set1_ps(`3.`);
45902	let r = _mm_maskz_rsqrt14_ps(`0`, a);
45903	assert_eq_m128(r, _mm_setzero_ps());
45904	let r = _mm_maskz_rsqrt14_ps(`0b00001111`, a);
45905	let e = _mm_set1_ps(`0.5773392`);
45906	assert_eq_m128(r, e);
45907	}
45908
45909	#[simd_test(enable = "avx512f")]
45910	unsafe fn test_mm512_getexp_ps() {
45911	let a = _mm512_set1_ps(`3.`);
45912	let r = _mm512_getexp_ps(a);
45913	let e = _mm512_set1_ps(`1.`);
45914	assert_eq_m512(r, e);
45915	}
45916
45917	#[simd_test(enable = "avx512f")]
45918	unsafe fn test_mm512_mask_getexp_ps() {
45919	let a = _mm512_set1_ps(`3.`);
45920	let r = _mm512_mask_getexp_ps(a, `0`, a);
45921	assert_eq_m512(r, a);
45922	let r = _mm512_mask_getexp_ps(a, `0b11111111_00000000`, a);
45923	let e = _mm512_setr_ps(
45924	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45925	);
45926	assert_eq_m512(r, e);
45927	}
45928
45929	#[simd_test(enable = "avx512f")]
45930	unsafe fn test_mm512_maskz_getexp_ps() {
45931	let a = _mm512_set1_ps(`3.`);
45932	let r = _mm512_maskz_getexp_ps(`0`, a);
45933	assert_eq_m512(r, _mm512_setzero_ps());
45934	let r = _mm512_maskz_getexp_ps(`0b11111111_00000000`, a);
45935	let e = _mm512_setr_ps(
45936	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
45937	);
45938	assert_eq_m512(r, e);
45939	}
45940
45941	#[simd_test(enable = "avx512f,avx512vl")]
45942	unsafe fn test_mm256_getexp_ps() {
45943	let a = _mm256_set1_ps(`3.`);
45944	let r = _mm256_getexp_ps(a);
45945	let e = _mm256_set1_ps(`1.`);
45946	assert_eq_m256(r, e);
45947	}
45948
45949	#[simd_test(enable = "avx512f,avx512vl")]
45950	unsafe fn test_mm256_mask_getexp_ps() {
45951	let a = _mm256_set1_ps(`3.`);
45952	let r = _mm256_mask_getexp_ps(a, `0`, a);
45953	assert_eq_m256(r, a);
45954	let r = _mm256_mask_getexp_ps(a, `0b11111111`, a);
45955	let e = _mm256_set1_ps(`1.`);
45956	assert_eq_m256(r, e);
45957	}
45958
45959	#[simd_test(enable = "avx512f,avx512vl")]
45960	unsafe fn test_mm256_maskz_getexp_ps() {
45961	let a = _mm256_set1_ps(`3.`);
45962	let r = _mm256_maskz_getexp_ps(`0`, a);
45963	assert_eq_m256(r, _mm256_setzero_ps());
45964	let r = _mm256_maskz_getexp_ps(`0b11111111`, a);
45965	let e = _mm256_set1_ps(`1.`);
45966	assert_eq_m256(r, e);
45967	}
45968
45969	#[simd_test(enable = "avx512f,avx512vl")]
45970	unsafe fn test_mm_getexp_ps() {
45971	let a = _mm_set1_ps(`3.`);
45972	let r = _mm_getexp_ps(a);
45973	let e = _mm_set1_ps(`1.`);
45974	assert_eq_m128(r, e);
45975	}
45976
45977	#[simd_test(enable = "avx512f,avx512vl")]
45978	unsafe fn test_mm_mask_getexp_ps() {
45979	let a = _mm_set1_ps(`3.`);
45980	let r = _mm_mask_getexp_ps(a, `0`, a);
45981	assert_eq_m128(r, a);
45982	let r = _mm_mask_getexp_ps(a, `0b00001111`, a);
45983	let e = _mm_set1_ps(`1.`);
45984	assert_eq_m128(r, e);
45985	}
45986
45987	#[simd_test(enable = "avx512f,avx512vl")]
45988	unsafe fn test_mm_maskz_getexp_ps() {
45989	let a = _mm_set1_ps(`3.`);
45990	let r = _mm_maskz_getexp_ps(`0`, a);
45991	assert_eq_m128(r, _mm_setzero_ps());
45992	let r = _mm_maskz_getexp_ps(`0b00001111`, a);
45993	let e = _mm_set1_ps(`1.`);
45994	assert_eq_m128(r, e);
45995	}
45996
45997	#[simd_test(enable = "avx512f")]
45998	unsafe fn test_mm512_roundscale_ps() {
45999	let a = _mm512_set1_ps(`1.1`);
46000	let r = _mm512_roundscale_ps::<`0b00_00_00_00`>(a);
46001	let e = _mm512_set1_ps(`1.0`);
46002	assert_eq_m512(r, e);
46003	}
46004
46005	#[simd_test(enable = "avx512f")]
46006	unsafe fn test_mm512_mask_roundscale_ps() {
46007	let a = _mm512_set1_ps(`1.1`);
46008	let r = _mm512_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
46009	let e = _mm512_set1_ps(`1.1`);
46010	assert_eq_m512(r, e);
46011	let r = _mm512_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a);
46012	let e = _mm512_set1_ps(`1.0`);
46013	assert_eq_m512(r, e);
46014	}
46015
46016	#[simd_test(enable = "avx512f")]
46017	unsafe fn test_mm512_maskz_roundscale_ps() {
46018	let a = _mm512_set1_ps(`1.1`);
46019	let r = _mm512_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
46020	assert_eq_m512(r, _mm512_setzero_ps());
46021	let r = _mm512_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b11111111_11111111`, a);
46022	let e = _mm512_set1_ps(`1.0`);
46023	assert_eq_m512(r, e);
46024	}
46025
46026	#[simd_test(enable = "avx512f,avx512vl")]
46027	unsafe fn test_mm256_roundscale_ps() {
46028	let a = _mm256_set1_ps(`1.1`);
46029	let r = _mm256_roundscale_ps::<`0b00_00_00_00`>(a);
46030	let e = _mm256_set1_ps(`1.0`);
46031	assert_eq_m256(r, e);
46032	}
46033
46034	#[simd_test(enable = "avx512f,avx512vl")]
46035	unsafe fn test_mm256_mask_roundscale_ps() {
46036	let a = _mm256_set1_ps(`1.1`);
46037	let r = _mm256_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
46038	let e = _mm256_set1_ps(`1.1`);
46039	assert_eq_m256(r, e);
46040	let r = _mm256_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b11111111`, a);
46041	let e = _mm256_set1_ps(`1.0`);
46042	assert_eq_m256(r, e);
46043	}
46044
46045	#[simd_test(enable = "avx512f,avx512vl")]
46046	unsafe fn test_mm256_maskz_roundscale_ps() {
46047	let a = _mm256_set1_ps(`1.1`);
46048	let r = _mm256_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
46049	assert_eq_m256(r, _mm256_setzero_ps());
46050	let r = _mm256_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b11111111`, a);
46051	let e = _mm256_set1_ps(`1.0`);
46052	assert_eq_m256(r, e);
46053	}
46054
46055	#[simd_test(enable = "avx512f,avx512vl")]
46056	unsafe fn test_mm_roundscale_ps() {
46057	let a = _mm_set1_ps(`1.1`);
46058	let r = _mm_roundscale_ps::<`0b00_00_00_00`>(a);
46059	let e = _mm_set1_ps(`1.0`);
46060	assert_eq_m128(r, e);
46061	}
46062
46063	#[simd_test(enable = "avx512f,avx512vl")]
46064	unsafe fn test_mm_mask_roundscale_ps() {
46065	let a = _mm_set1_ps(`1.1`);
46066	let r = _mm_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
46067	let e = _mm_set1_ps(`1.1`);
46068	assert_eq_m128(r, e);
46069	let r = _mm_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b00001111`, a);
46070	let e = _mm_set1_ps(`1.0`);
46071	assert_eq_m128(r, e);
46072	}
46073
46074	#[simd_test(enable = "avx512f,avx512vl")]
46075	unsafe fn test_mm_maskz_roundscale_ps() {
46076	let a = _mm_set1_ps(`1.1`);
46077	let r = _mm_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
46078	assert_eq_m128(r, _mm_setzero_ps());
46079	let r = _mm_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b00001111`, a);
46080	let e = _mm_set1_ps(`1.0`);
46081	assert_eq_m128(r, e);
46082	}
46083
46084	#[simd_test(enable = "avx512f")]
46085	unsafe fn test_mm512_scalef_ps() {
46086	let a = _mm512_set1_ps(`1.`);
46087	let b = _mm512_set1_ps(`3.`);
46088	let r = _mm512_scalef_ps(a, b);
46089	let e = _mm512_set1_ps(`8.`);
46090	assert_eq_m512(r, e);
46091	}
46092
46093	#[simd_test(enable = "avx512f")]
46094	unsafe fn test_mm512_mask_scalef_ps() {
46095	let a = _mm512_set1_ps(`1.`);
46096	let b = _mm512_set1_ps(`3.`);
46097	let r = _mm512_mask_scalef_ps(a, `0`, a, b);
46098	assert_eq_m512(r, a);
46099	let r = _mm512_mask_scalef_ps(a, `0b11111111_00000000`, a, b);
46100	let e = _mm512_set_ps(
46101	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46102	);
46103	assert_eq_m512(r, e);
46104	}
46105
46106	#[simd_test(enable = "avx512f")]
46107	unsafe fn test_mm512_maskz_scalef_ps() {
46108	let a = _mm512_set1_ps(`1.`);
46109	let b = _mm512_set1_ps(`3.`);
46110	let r = _mm512_maskz_scalef_ps(`0`, a, b);
46111	assert_eq_m512(r, _mm512_setzero_ps());
46112	let r = _mm512_maskz_scalef_ps(`0b11111111_00000000`, a, b);
46113	let e = _mm512_set_ps(
46114	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46115	);
46116	assert_eq_m512(r, e);
46117	}
46118
46119	#[simd_test(enable = "avx512f,avx512vl")]
46120	unsafe fn test_mm256_scalef_ps() {
46121	let a = _mm256_set1_ps(`1.`);
46122	let b = _mm256_set1_ps(`3.`);
46123	let r = _mm256_scalef_ps(a, b);
46124	let e = _mm256_set1_ps(`8.`);
46125	assert_eq_m256(r, e);
46126	}
46127
46128	#[simd_test(enable = "avx512f,avx512vl")]
46129	unsafe fn test_mm256_mask_scalef_ps() {
46130	let a = _mm256_set1_ps(`1.`);
46131	let b = _mm256_set1_ps(`3.`);
46132	let r = _mm256_mask_scalef_ps(a, `0`, a, b);
46133	assert_eq_m256(r, a);
46134	let r = _mm256_mask_scalef_ps(a, `0b11111111`, a, b);
46135	let e = _mm256_set1_ps(`8.`);
46136	assert_eq_m256(r, e);
46137	}
46138
46139	#[simd_test(enable = "avx512f,avx512vl")]
46140	unsafe fn test_mm256_maskz_scalef_ps() {
46141	let a = _mm256_set1_ps(`1.`);
46142	let b = _mm256_set1_ps(`3.`);
46143	let r = _mm256_maskz_scalef_ps(`0`, a, b);
46144	assert_eq_m256(r, _mm256_setzero_ps());
46145	let r = _mm256_maskz_scalef_ps(`0b11111111`, a, b);
46146	let e = _mm256_set1_ps(`8.`);
46147	assert_eq_m256(r, e);
46148	}
46149
46150	#[simd_test(enable = "avx512f,avx512vl")]
46151	unsafe fn test_mm_scalef_ps() {
46152	let a = _mm_set1_ps(`1.`);
46153	let b = _mm_set1_ps(`3.`);
46154	let r = _mm_scalef_ps(a, b);
46155	let e = _mm_set1_ps(`8.`);
46156	assert_eq_m128(r, e);
46157	}
46158
46159	#[simd_test(enable = "avx512f,avx512vl")]
46160	unsafe fn test_mm_mask_scalef_ps() {
46161	let a = _mm_set1_ps(`1.`);
46162	let b = _mm_set1_ps(`3.`);
46163	let r = _mm_mask_scalef_ps(a, `0`, a, b);
46164	assert_eq_m128(r, a);
46165	let r = _mm_mask_scalef_ps(a, `0b00001111`, a, b);
46166	let e = _mm_set1_ps(`8.`);
46167	assert_eq_m128(r, e);
46168	}
46169
46170	#[simd_test(enable = "avx512f,avx512vl")]
46171	unsafe fn test_mm_maskz_scalef_ps() {
46172	let a = _mm_set1_ps(`1.`);
46173	let b = _mm_set1_ps(`3.`);
46174	let r = _mm_maskz_scalef_ps(`0`, a, b);
46175	assert_eq_m128(r, _mm_setzero_ps());
46176	let r = _mm_maskz_scalef_ps(`0b00001111`, a, b);
46177	let e = _mm_set1_ps(`8.`);
46178	assert_eq_m128(r, e);
46179	}
46180
46181	#[simd_test(enable = "avx512f")]
46182	unsafe fn test_mm512_fixupimm_ps() {
46183	let a = _mm512_set1_ps(f32::NAN);
46184	let b = _mm512_set1_ps(f32::MAX);
46185	let c = _mm512_set1_epi32(i32::MAX);
46186	//let r = _mm512_fixupimm_ps(a, b, c, 5);
46187	let r = _mm512_fixupimm_ps::<`5`>(a, b, c);
46188	let e = _mm512_set1_ps(`0.0`);
46189	assert_eq_m512(r, e);
46190	}
46191
46192	#[simd_test(enable = "avx512f")]
46193	unsafe fn test_mm512_mask_fixupimm_ps() {
46194	#[rustfmt::skip]
46195	let a = _mm512_set_ps(
46196	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46197	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46198	`1.`, `1.`, `1.`, `1.`,
46199	`1.`, `1.`, `1.`, `1.`,
46200	);
46201	let b = _mm512_set1_ps(f32::MAX);
46202	let c = _mm512_set1_epi32(i32::MAX);
46203	let r = _mm512_mask_fixupimm_ps::<`5`>(a, `0b11111111_00000000`, b, c);
46204	let e = _mm512_set_ps(
46205	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46206	);
46207	assert_eq_m512(r, e);
46208	}
46209
46210	#[simd_test(enable = "avx512f")]
46211	unsafe fn test_mm512_maskz_fixupimm_ps() {
46212	#[rustfmt::skip]
46213	let a = _mm512_set_ps(
46214	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46215	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46216	`1.`, `1.`, `1.`, `1.`,
46217	`1.`, `1.`, `1.`, `1.`,
46218	);
46219	let b = _mm512_set1_ps(f32::MAX);
46220	let c = _mm512_set1_epi32(i32::MAX);
46221	let r = _mm512_maskz_fixupimm_ps::<`5`>(`0b11111111_00000000`, a, b, c);
46222	let e = _mm512_set_ps(
46223	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46224	);
46225	assert_eq_m512(r, e);
46226	}
46227
46228	#[simd_test(enable = "avx512f,avx512vl")]
46229	unsafe fn test_mm256_fixupimm_ps() {
46230	let a = _mm256_set1_ps(f32::NAN);
46231	let b = _mm256_set1_ps(f32::MAX);
46232	let c = _mm256_set1_epi32(i32::MAX);
46233	let r = _mm256_fixupimm_ps::<`5`>(a, b, c);
46234	let e = _mm256_set1_ps(`0.0`);
46235	assert_eq_m256(r, e);
46236	}
46237
46238	#[simd_test(enable = "avx512f,avx512vl")]
46239	unsafe fn test_mm256_mask_fixupimm_ps() {
46240	let a = _mm256_set1_ps(f32::NAN);
46241	let b = _mm256_set1_ps(f32::MAX);
46242	let c = _mm256_set1_epi32(i32::MAX);
46243	let r = _mm256_mask_fixupimm_ps::<`5`>(a, `0b11111111`, b, c);
46244	let e = _mm256_set1_ps(`0.0`);
46245	assert_eq_m256(r, e);
46246	}
46247
46248	#[simd_test(enable = "avx512f,avx512vl")]
46249	unsafe fn test_mm256_maskz_fixupimm_ps() {
46250	let a = _mm256_set1_ps(f32::NAN);
46251	let b = _mm256_set1_ps(f32::MAX);
46252	let c = _mm256_set1_epi32(i32::MAX);
46253	let r = _mm256_maskz_fixupimm_ps::<`5`>(`0b11111111`, a, b, c);
46254	let e = _mm256_set1_ps(`0.0`);
46255	assert_eq_m256(r, e);
46256	}
46257
46258	#[simd_test(enable = "avx512f,avx512vl")]
46259	unsafe fn test_mm_fixupimm_ps() {
46260	let a = _mm_set1_ps(f32::NAN);
46261	let b = _mm_set1_ps(f32::MAX);
46262	let c = _mm_set1_epi32(i32::MAX);
46263	let r = _mm_fixupimm_ps::<`5`>(a, b, c);
46264	let e = _mm_set1_ps(`0.0`);
46265	assert_eq_m128(r, e);
46266	}
46267
46268	#[simd_test(enable = "avx512f,avx512vl")]
46269	unsafe fn test_mm_mask_fixupimm_ps() {
46270	let a = _mm_set1_ps(f32::NAN);
46271	let b = _mm_set1_ps(f32::MAX);
46272	let c = _mm_set1_epi32(i32::MAX);
46273	let r = _mm_mask_fixupimm_ps::<`5`>(a, `0b00001111`, b, c);
46274	let e = _mm_set1_ps(`0.0`);
46275	assert_eq_m128(r, e);
46276	}
46277
46278	#[simd_test(enable = "avx512f,avx512vl")]
46279	unsafe fn test_mm_maskz_fixupimm_ps() {
46280	let a = _mm_set1_ps(f32::NAN);
46281	let b = _mm_set1_ps(f32::MAX);
46282	let c = _mm_set1_epi32(i32::MAX);
46283	let r = _mm_maskz_fixupimm_ps::<`5`>(`0b00001111`, a, b, c);
46284	let e = _mm_set1_ps(`0.0`);
46285	assert_eq_m128(r, e);
46286	}
46287
46288	#[simd_test(enable = "avx512f")]
46289	unsafe fn test_mm512_ternarylogic_epi32() {
46290	let a = _mm512_set1_epi32(`1` << `2`);
46291	let b = _mm512_set1_epi32(`1` << `1`);
46292	let c = _mm512_set1_epi32(`1` << `0`);
46293	let r = _mm512_ternarylogic_epi32::<`8`>(a, b, c);
46294	let e = _mm512_set1_epi32(`0`);
46295	assert_eq_m512i(r, e);
46296	}
46297
46298	#[simd_test(enable = "avx512f")]
46299	unsafe fn test_mm512_mask_ternarylogic_epi32() {
46300	let src = _mm512_set1_epi32(`1` << `2`);
46301	let a = _mm512_set1_epi32(`1` << `1`);
46302	let b = _mm512_set1_epi32(`1` << `0`);
46303	let r = _mm512_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
46304	assert_eq_m512i(r, src);
46305	let r = _mm512_mask_ternarylogic_epi32::<`8`>(src, `0b11111111_11111111`, a, b);
46306	let e = _mm512_set1_epi32(`0`);
46307	assert_eq_m512i(r, e);
46308	}
46309
46310	#[simd_test(enable = "avx512f")]
46311	unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46312	let a = _mm512_set1_epi32(`1` << `2`);
46313	let b = _mm512_set1_epi32(`1` << `1`);
46314	let c = _mm512_set1_epi32(`1` << `0`);
46315	let r = _mm512_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
46316	assert_eq_m512i(r, _mm512_setzero_si512());
46317	let r = _mm512_maskz_ternarylogic_epi32::<`8`>(`0b11111111_11111111`, a, b, c);
46318	let e = _mm512_set1_epi32(`0`);
46319	assert_eq_m512i(r, e);
46320	}
46321
46322	#[simd_test(enable = "avx512f,avx512vl")]
46323	unsafe fn test_mm256_ternarylogic_epi32() {
46324	let a = _mm256_set1_epi32(`1` << `2`);
46325	let b = _mm256_set1_epi32(`1` << `1`);
46326	let c = _mm256_set1_epi32(`1` << `0`);
46327	let r = _mm256_ternarylogic_epi32::<`8`>(a, b, c);
46328	let e = _mm256_set1_epi32(`0`);
46329	assert_eq_m256i(r, e);
46330	}
46331
46332	#[simd_test(enable = "avx512f,avx512vl")]
46333	unsafe fn test_mm256_mask_ternarylogic_epi32() {
46334	let src = _mm256_set1_epi32(`1` << `2`);
46335	let a = _mm256_set1_epi32(`1` << `1`);
46336	let b = _mm256_set1_epi32(`1` << `0`);
46337	let r = _mm256_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
46338	assert_eq_m256i(r, src);
46339	let r = _mm256_mask_ternarylogic_epi32::<`8`>(src, `0b11111111`, a, b);
46340	let e = _mm256_set1_epi32(`0`);
46341	assert_eq_m256i(r, e);
46342	}
46343
46344	#[simd_test(enable = "avx512f,avx512vl")]
46345	unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46346	let a = _mm256_set1_epi32(`1` << `2`);
46347	let b = _mm256_set1_epi32(`1` << `1`);
46348	let c = _mm256_set1_epi32(`1` << `0`);
46349	let r = _mm256_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
46350	assert_eq_m256i(r, _mm256_setzero_si256());
46351	let r = _mm256_maskz_ternarylogic_epi32::<`8`>(`0b11111111`, a, b, c);
46352	let e = _mm256_set1_epi32(`0`);
46353	assert_eq_m256i(r, e);
46354	}
46355
46356	#[simd_test(enable = "avx512f,avx512vl")]
46357	unsafe fn test_mm_ternarylogic_epi32() {
46358	let a = _mm_set1_epi32(`1` << `2`);
46359	let b = _mm_set1_epi32(`1` << `1`);
46360	let c = _mm_set1_epi32(`1` << `0`);
46361	let r = _mm_ternarylogic_epi32::<`8`>(a, b, c);
46362	let e = _mm_set1_epi32(`0`);
46363	assert_eq_m128i(r, e);
46364	}
46365
46366	#[simd_test(enable = "avx512f,avx512vl")]
46367	unsafe fn test_mm_mask_ternarylogic_epi32() {
46368	let src = _mm_set1_epi32(`1` << `2`);
46369	let a = _mm_set1_epi32(`1` << `1`);
46370	let b = _mm_set1_epi32(`1` << `0`);
46371	let r = _mm_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
46372	assert_eq_m128i(r, src);
46373	let r = _mm_mask_ternarylogic_epi32::<`8`>(src, `0b00001111`, a, b);
46374	let e = _mm_set1_epi32(`0`);
46375	assert_eq_m128i(r, e);
46376	}
46377
46378	#[simd_test(enable = "avx512f,avx512vl")]
46379	unsafe fn test_mm_maskz_ternarylogic_epi32() {
46380	let a = _mm_set1_epi32(`1` << `2`);
46381	let b = _mm_set1_epi32(`1` << `1`);
46382	let c = _mm_set1_epi32(`1` << `0`);
46383	let r = _mm_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
46384	assert_eq_m128i(r, _mm_setzero_si128());
46385	let r = _mm_maskz_ternarylogic_epi32::<`8`>(`0b00001111`, a, b, c);
46386	let e = _mm_set1_epi32(`0`);
46387	assert_eq_m128i(r, e);
46388	}
46389
46390	#[simd_test(enable = "avx512f")]
46391	unsafe fn test_mm512_getmant_ps() {
46392	let a = _mm512_set1_ps(`10.`);
46393	let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46394	let e = _mm512_set1_ps(`1.25`);
46395	assert_eq_m512(r, e);
46396	}
46397
46398	#[simd_test(enable = "avx512f")]
46399	unsafe fn test_mm512_mask_getmant_ps() {
46400	let a = _mm512_set1_ps(`10.`);
46401	let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
46402	assert_eq_m512(r, a);
46403	let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46404	a,
46405	`0b11111111_00000000`,
46406	a,
46407	);
46408	let e = _mm512_setr_ps(
46409	`10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
46410	);
46411	assert_eq_m512(r, e);
46412	}
46413
46414	#[simd_test(enable = "avx512f")]
46415	unsafe fn test_mm512_maskz_getmant_ps() {
46416	let a = _mm512_set1_ps(`10.`);
46417	let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
46418	assert_eq_m512(r, _mm512_setzero_ps());
46419	let r =
46420	_mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111_00000000`, a);
46421	let e = _mm512_setr_ps(
46422	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
46423	);
46424	assert_eq_m512(r, e);
46425	}
46426
46427	#[simd_test(enable = "avx512f,avx512vl")]
46428	unsafe fn test_mm256_getmant_ps() {
46429	let a = _mm256_set1_ps(`10.`);
46430	let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46431	let e = _mm256_set1_ps(`1.25`);
46432	assert_eq_m256(r, e);
46433	}
46434
46435	#[simd_test(enable = "avx512f,avx512vl")]
46436	unsafe fn test_mm256_mask_getmant_ps() {
46437	let a = _mm256_set1_ps(`10.`);
46438	let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
46439	assert_eq_m256(r, a);
46440	let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a);
46441	let e = _mm256_set1_ps(`1.25`);
46442	assert_eq_m256(r, e);
46443	}
46444
46445	#[simd_test(enable = "avx512f,avx512vl")]
46446	unsafe fn test_mm256_maskz_getmant_ps() {
46447	let a = _mm256_set1_ps(`10.`);
46448	let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
46449	assert_eq_m256(r, _mm256_setzero_ps());
46450	let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a);
46451	let e = _mm256_set1_ps(`1.25`);
46452	assert_eq_m256(r, e);
46453	}
46454
46455	#[simd_test(enable = "avx512f,avx512vl")]
46456	unsafe fn test_mm_getmant_ps() {
46457	let a = _mm_set1_ps(`10.`);
46458	let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46459	let e = _mm_set1_ps(`1.25`);
46460	assert_eq_m128(r, e);
46461	}
46462
46463	#[simd_test(enable = "avx512f,avx512vl")]
46464	unsafe fn test_mm_mask_getmant_ps() {
46465	let a = _mm_set1_ps(`10.`);
46466	let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
46467	assert_eq_m128(r, a);
46468	let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b00001111`, a);
46469	let e = _mm_set1_ps(`1.25`);
46470	assert_eq_m128(r, e);
46471	}
46472
46473	#[simd_test(enable = "avx512f,avx512vl")]
46474	unsafe fn test_mm_maskz_getmant_ps() {
46475	let a = _mm_set1_ps(`10.`);
46476	let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
46477	assert_eq_m128(r, _mm_setzero_ps());
46478	let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b00001111`, a);
46479	let e = _mm_set1_ps(`1.25`);
46480	assert_eq_m128(r, e);
46481	}
46482
46483	#[simd_test(enable = "avx512f")]
46484	unsafe fn test_mm512_add_round_ps() {
46485	let a = _mm512_setr_ps(
46486	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
46487	);
46488	let b = _mm512_set1_ps(`-1.`);
46489	let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
46490	#[rustfmt::skip]
46491	let e = _mm512_setr_ps(
46492	`-1.`, `0.5`, `1.`, `2.5`,
46493	`3.`, `4.5`, `5.`, `6.5`,
46494	`7.`, `8.5`, `9.`, `10.5`,
46495	`11.`, `12.5`, `13.`, `-0.99999994`,
46496	);
46497	assert_eq_m512(r, e);
46498	let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
46499	let e = _mm512_setr_ps(
46500	`-1.`, `0.5`, `1.`, `2.5`, `3.`, `4.5`, `5.`, `6.5`, `7.`, `8.5`, `9.`, `10.5`, `11.`, `12.5`, `13.`, `-0.9999999`,
46501	);
46502	assert_eq_m512(r, e);
46503	}
46504
46505	#[simd_test(enable = "avx512f")]
46506	unsafe fn test_mm512_mask_add_round_ps() {
46507	let a = _mm512_setr_ps(
46508	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
46509	);
46510	let b = _mm512_set1_ps(`-1.`);
46511	let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, `0`, a, b);
46512	assert_eq_m512(r, a);
46513	let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46514	a,
46515	`0b11111111_00000000`,
46516	a,
46517	b,
46518	);
46519	#[rustfmt::skip]
46520	let e = _mm512_setr_ps(
46521	`0.`, `1.5`, `2.`, `3.5`,
46522	`4.`, `5.5`, `6.`, `7.5`,
46523	`7.`, `8.5`, `9.`, `10.5`,
46524	`11.`, `12.5`, `13.`, `-0.99999994`,
46525	);
46526	assert_eq_m512(r, e);
46527	}
46528
46529	#[simd_test(enable = "avx512f")]
46530	unsafe fn test_mm512_maskz_add_round_ps() {
46531	let a = _mm512_setr_ps(
46532	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
46533	);
46534	let b = _mm512_set1_ps(`-1.`);
46535	let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
46536	assert_eq_m512(r, _mm512_setzero_ps());
46537	let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46538	`0b11111111_00000000`,
46539	a,
46540	b,
46541	);
46542	#[rustfmt::skip]
46543	let e = _mm512_setr_ps(
46544	`0.`, `0.`, `0.`, `0.`,
46545	`0.`, `0.`, `0.`, `0.`,
46546	`7.`, `8.5`, `9.`, `10.5`,
46547	`11.`, `12.5`, `13.`, `-0.99999994`,
46548	);
46549	assert_eq_m512(r, e);
46550	}
46551
46552	#[simd_test(enable = "avx512f")]
46553	unsafe fn test_mm512_sub_round_ps() {
46554	let a = _mm512_setr_ps(
46555	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
46556	);
46557	let b = _mm512_set1_ps(`1.`);
46558	let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
46559	#[rustfmt::skip]
46560	let e = _mm512_setr_ps(
46561	`-1.`, `0.5`, `1.`, `2.5`,
46562	`3.`, `4.5`, `5.`, `6.5`,
46563	`7.`, `8.5`, `9.`, `10.5`,
46564	`11.`, `12.5`, `13.`, `-0.99999994`,
46565	);
46566	assert_eq_m512(r, e);
46567	let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
46568	let e = _mm512_setr_ps(
46569	`-1.`, `0.5`, `1.`, `2.5`, `3.`, `4.5`, `5.`, `6.5`, `7.`, `8.5`, `9.`, `10.5`, `11.`, `12.5`, `13.`, `-0.9999999`,
46570	);
46571	assert_eq_m512(r, e);
46572	}
46573
46574	#[simd_test(enable = "avx512f")]
46575	unsafe fn test_mm512_mask_sub_round_ps() {
46576	let a = _mm512_setr_ps(
46577	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
46578	);
46579	let b = _mm512_set1_ps(`1.`);
46580	let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46581	a, `0`, a, b,
46582	);
46583	assert_eq_m512(r, a);
46584	let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46585	a,
46586	`0b11111111_00000000`,
46587	a,
46588	b,
46589	);
46590	#[rustfmt::skip]
46591	let e = _mm512_setr_ps(
46592	`0.`, `1.5`, `2.`, `3.5`,
46593	`4.`, `5.5`, `6.`, `7.5`,
46594	`7.`, `8.5`, `9.`, `10.5`,
46595	`11.`, `12.5`, `13.`, `-0.99999994`,
46596	);
46597	assert_eq_m512(r, e);
46598	}
46599
46600	#[simd_test(enable = "avx512f")]
46601	unsafe fn test_mm512_maskz_sub_round_ps() {
46602	let a = _mm512_setr_ps(
46603	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
46604	);
46605	let b = _mm512_set1_ps(`1.`);
46606	let r =
46607	_mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
46608	assert_eq_m512(r, _mm512_setzero_ps());
46609	let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46610	`0b11111111_00000000`,
46611	a,
46612	b,
46613	);
46614	#[rustfmt::skip]
46615	let e = _mm512_setr_ps(
46616	`0.`, `0.`, `0.`, `0.`,
46617	`0.`, `0.`, `0.`, `0.`,
46618	`7.`, `8.5`, `9.`, `10.5`,
46619	`11.`, `12.5`, `13.`, `-0.99999994`,
46620	);
46621	assert_eq_m512(r, e);
46622	}
46623
46624	#[simd_test(enable = "avx512f")]
46625	unsafe fn test_mm512_mul_round_ps() {
46626	#[rustfmt::skip]
46627	let a = _mm512_setr_ps(
46628	`0.`, `1.5`, `2.`, `3.5`,
46629	`4.`, `5.5`, `6.`, `7.5`,
46630	`8.`, `9.5`, `10.`, `11.5`,
46631	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
46632	);
46633	let b = _mm512_set1_ps(`0.1`);
46634	let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
46635	#[rustfmt::skip]
46636	let e = _mm512_setr_ps(
46637	`0.`, `0.15`, `0.2`, `0.35`,
46638	`0.4`, `0.55`, `0.6`, `0.75`,
46639	`0.8`, `0.95`, `1.0`, `1.15`,
46640	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
46641	);
46642	assert_eq_m512(r, e);
46643	let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
46644	#[rustfmt::skip]
46645	let e = _mm512_setr_ps(
46646	`0.`, `0.14999999`, `0.2`, `0.35`,
46647	`0.4`, `0.54999995`, `0.59999996`, `0.75`,
46648	`0.8`, `0.95`, `1.0`, `1.15`,
46649	`1.1999999`, `1.3499999`, `1.4`, `0.000000000000000000000007`,
46650	);
46651	assert_eq_m512(r, e);
46652	}
46653
46654	#[simd_test(enable = "avx512f")]
46655	unsafe fn test_mm512_mask_mul_round_ps() {
46656	#[rustfmt::skip]
46657	let a = _mm512_setr_ps(
46658	`0.`, `1.5`, `2.`, `3.5`,
46659	`4.`, `5.5`, `6.`, `7.5`,
46660	`8.`, `9.5`, `10.`, `11.5`,
46661	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
46662	);
46663	let b = _mm512_set1_ps(`0.1`);
46664	let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46665	a, `0`, a, b,
46666	);
46667	assert_eq_m512(r, a);
46668	let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46669	a,
46670	`0b11111111_00000000`,
46671	a,
46672	b,
46673	);
46674	#[rustfmt::skip]
46675	let e = _mm512_setr_ps(
46676	`0.`, `1.5`, `2.`, `3.5`,
46677	`4.`, `5.5`, `6.`, `7.5`,
46678	`0.8`, `0.95`, `1.0`, `1.15`,
46679	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
46680	);
46681	assert_eq_m512(r, e);
46682	}
46683
46684	#[simd_test(enable = "avx512f")]
46685	unsafe fn test_mm512_maskz_mul_round_ps() {
46686	#[rustfmt::skip]
46687	let a = _mm512_setr_ps(
46688	`0.`, `1.5`, `2.`, `3.5`,
46689	`4.`, `5.5`, `6.`, `7.5`,
46690	`8.`, `9.5`, `10.`, `11.5`,
46691	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
46692	);
46693	let b = _mm512_set1_ps(`0.1`);
46694	let r =
46695	_mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
46696	assert_eq_m512(r, _mm512_setzero_ps());
46697	let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46698	`0b11111111_00000000`,
46699	a,
46700	b,
46701	);
46702	#[rustfmt::skip]
46703	let e = _mm512_setr_ps(
46704	`0.`, `0.`, `0.`, `0.`,
46705	`0.`, `0.`, `0.`, `0.`,
46706	`0.8`, `0.95`, `1.0`, `1.15`,
46707	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
46708	);
46709	assert_eq_m512(r, e);
46710	}
46711
46712	#[simd_test(enable = "avx512f")]
46713	unsafe fn test_mm512_div_round_ps() {
46714	let a = _mm512_set1_ps(`1.`);
46715	let b = _mm512_set1_ps(`3.`);
46716	let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
46717	let e = _mm512_set1_ps(`0.33333334`);
46718	assert_eq_m512(r, e);
46719	let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
46720	let e = _mm512_set1_ps(`0.3333333`);
46721	assert_eq_m512(r, e);
46722	}
46723
46724	#[simd_test(enable = "avx512f")]
46725	unsafe fn test_mm512_mask_div_round_ps() {
46726	let a = _mm512_set1_ps(`1.`);
46727	let b = _mm512_set1_ps(`3.`);
46728	let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46729	a, `0`, a, b,
46730	);
46731	assert_eq_m512(r, a);
46732	let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46733	a,
46734	`0b11111111_00000000`,
46735	a,
46736	b,
46737	);
46738	let e = _mm512_setr_ps(
46739	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
46740	`0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
46741	);
46742	assert_eq_m512(r, e);
46743	}
46744
46745	#[simd_test(enable = "avx512f")]
46746	unsafe fn test_mm512_maskz_div_round_ps() {
46747	let a = _mm512_set1_ps(`1.`);
46748	let b = _mm512_set1_ps(`3.`);
46749	let r =
46750	_mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
46751	assert_eq_m512(r, _mm512_setzero_ps());
46752	let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46753	`0b11111111_00000000`,
46754	a,
46755	b,
46756	);
46757	let e = _mm512_setr_ps(
46758	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
46759	`0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
46760	);
46761	assert_eq_m512(r, e);
46762	}
46763
46764	#[simd_test(enable = "avx512f")]
46765	unsafe fn test_mm512_sqrt_round_ps() {
46766	let a = _mm512_set1_ps(`3.`);
46767	let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
46768	let e = _mm512_set1_ps(`1.7320508`);
46769	assert_eq_m512(r, e);
46770	let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC }>(a);
46771	let e = _mm512_set1_ps(`1.7320509`);
46772	assert_eq_m512(r, e);
46773	}
46774
46775	#[simd_test(enable = "avx512f")]
46776	unsafe fn test_mm512_mask_sqrt_round_ps() {
46777	let a = _mm512_set1_ps(`3.`);
46778	let r =
46779	_mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `0`, a);
46780	assert_eq_m512(r, a);
46781	let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46782	a,
46783	`0b11111111_00000000`,
46784	a,
46785	);
46786	let e = _mm512_setr_ps(
46787	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`,
46788	`1.7320508`, `1.7320508`, `1.7320508`,
46789	);
46790	assert_eq_m512(r, e);
46791	}
46792
46793	#[simd_test(enable = "avx512f")]
46794	unsafe fn test_mm512_maskz_sqrt_round_ps() {
46795	let a = _mm512_set1_ps(`3.`);
46796	let r =
46797	_mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a);
46798	assert_eq_m512(r, _mm512_setzero_ps());
46799	let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46800	`0b11111111_00000000`,
46801	a,
46802	);
46803	let e = _mm512_setr_ps(
46804	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`,
46805	`1.7320508`, `1.7320508`, `1.7320508`,
46806	);
46807	assert_eq_m512(r, e);
46808	}
46809
46810	#[simd_test(enable = "avx512f")]
46811	unsafe fn test_mm512_fmadd_round_ps() {
46812	let a = _mm512_set1_ps(`0.00000007`);
46813	let b = _mm512_set1_ps(`1.`);
46814	let c = _mm512_set1_ps(`-1.`);
46815	let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
46816	let e = _mm512_set1_ps(`-0.99999994`);
46817	assert_eq_m512(r, e);
46818	let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
46819	let e = _mm512_set1_ps(`-0.9999999`);
46820	assert_eq_m512(r, e);
46821	}
46822
46823	#[simd_test(enable = "avx512f")]
46824	unsafe fn test_mm512_mask_fmadd_round_ps() {
46825	let a = _mm512_set1_ps(`0.00000007`);
46826	let b = _mm512_set1_ps(`1.`);
46827	let c = _mm512_set1_ps(`-1.`);
46828	let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46829	a, `0`, b, c,
46830	);
46831	assert_eq_m512(r, a);
46832	let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46833	a,
46834	`0b00000000_11111111`,
46835	b,
46836	c,
46837	);
46838	#[rustfmt::skip]
46839	let e = _mm512_setr_ps(
46840	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46841	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46842	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
46843	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
46844	);
46845	assert_eq_m512(r, e);
46846	}
46847
46848	#[simd_test(enable = "avx512f")]
46849	unsafe fn test_mm512_maskz_fmadd_round_ps() {
46850	let a = _mm512_set1_ps(`0.00000007`);
46851	let b = _mm512_set1_ps(`1.`);
46852	let c = _mm512_set1_ps(`-1.`);
46853	let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46854	`0`, a, b, c,
46855	);
46856	assert_eq_m512(r, _mm512_setzero_ps());
46857	#[rustfmt::skip]
46858	let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46859	`0b00000000_11111111`,
46860	a,
46861	b,
46862	c,
46863	);
46864	#[rustfmt::skip]
46865	let e = _mm512_setr_ps(
46866	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46867	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46868	`0.`, `0.`, `0.`, `0.`,
46869	`0.`, `0.`, `0.`, `0.`,
46870	);
46871	assert_eq_m512(r, e);
46872	}
46873
46874	#[simd_test(enable = "avx512f")]
46875	unsafe fn test_mm512_mask3_fmadd_round_ps() {
46876	let a = _mm512_set1_ps(`0.00000007`);
46877	let b = _mm512_set1_ps(`1.`);
46878	let c = _mm512_set1_ps(`-1.`);
46879	let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46880	a, b, c, `0`,
46881	);
46882	assert_eq_m512(r, c);
46883	let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46884	a,
46885	b,
46886	c,
46887	`0b00000000_11111111`,
46888	);
46889	#[rustfmt::skip]
46890	let e = _mm512_setr_ps(
46891	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46892	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46893	`-1.`, `-1.`, `-1.`, `-1.`,
46894	`-1.`, `-1.`, `-1.`, `-1.`,
46895	);
46896	assert_eq_m512(r, e);
46897	}
46898
46899	#[simd_test(enable = "avx512f")]
46900	unsafe fn test_mm512_fmsub_round_ps() {
46901	let a = _mm512_set1_ps(`0.00000007`);
46902	let b = _mm512_set1_ps(`1.`);
46903	let c = _mm512_set1_ps(`1.`);
46904	let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
46905	let e = _mm512_set1_ps(`-0.99999994`);
46906	assert_eq_m512(r, e);
46907	let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
46908	let e = _mm512_set1_ps(`-0.9999999`);
46909	assert_eq_m512(r, e);
46910	}
46911
46912	#[simd_test(enable = "avx512f")]
46913	unsafe fn test_mm512_mask_fmsub_round_ps() {
46914	let a = _mm512_set1_ps(`0.00000007`);
46915	let b = _mm512_set1_ps(`1.`);
46916	let c = _mm512_set1_ps(`1.`);
46917	let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46918	a, `0`, b, c,
46919	);
46920	assert_eq_m512(r, a);
46921	let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46922	a,
46923	`0b00000000_11111111`,
46924	b,
46925	c,
46926	);
46927	#[rustfmt::skip]
46928	let e = _mm512_setr_ps(
46929	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46930	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46931	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
46932	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
46933	);
46934	assert_eq_m512(r, e);
46935	}
46936
46937	#[simd_test(enable = "avx512f")]
46938	unsafe fn test_mm512_maskz_fmsub_round_ps() {
46939	let a = _mm512_set1_ps(`0.00000007`);
46940	let b = _mm512_set1_ps(`1.`);
46941	let c = _mm512_set1_ps(`1.`);
46942	let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46943	`0`, a, b, c,
46944	);
46945	assert_eq_m512(r, _mm512_setzero_ps());
46946	let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46947	`0b00000000_11111111`,
46948	a,
46949	b,
46950	c,
46951	);
46952	#[rustfmt::skip]
46953	let e = _mm512_setr_ps(
46954	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46955	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46956	`0.`, `0.`, `0.`, `0.`,
46957	`0.`, `0.`, `0.`, `0.`,
46958	);
46959	assert_eq_m512(r, e);
46960	}
46961
46962	#[simd_test(enable = "avx512f")]
46963	unsafe fn test_mm512_mask3_fmsub_round_ps() {
46964	let a = _mm512_set1_ps(`0.00000007`);
46965	let b = _mm512_set1_ps(`1.`);
46966	let c = _mm512_set1_ps(`1.`);
46967	let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46968	a, b, c, `0`,
46969	);
46970	assert_eq_m512(r, c);
46971	let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
46972	a,
46973	b,
46974	c,
46975	`0b00000000_11111111`,
46976	);
46977	#[rustfmt::skip]
46978	let e = _mm512_setr_ps(
46979	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46980	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
46981	`1.`, `1.`, `1.`, `1.`,
46982	`1.`, `1.`, `1.`, `1.`,
46983	);
46984	assert_eq_m512(r, e);
46985	}
46986
46987	#[simd_test(enable = "avx512f")]
46988	unsafe fn test_mm512_fmaddsub_round_ps() {
46989	let a = _mm512_set1_ps(`0.00000007`);
46990	let b = _mm512_set1_ps(`1.`);
46991	let c = _mm512_set1_ps(`-1.`);
46992	let r =
46993	_mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
46994	#[rustfmt::skip]
46995	let e = _mm512_setr_ps(
46996	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
46997	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
46998	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
46999	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47000	);
47001	assert_eq_m512(r, e);
47002	let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
47003	let e = _mm512_setr_ps(
47004	`1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
47005	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`,
47006	);
47007	assert_eq_m512(r, e);
47008	}
47009
47010	#[simd_test(enable = "avx512f")]
47011	unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47012	let a = _mm512_set1_ps(`0.00000007`);
47013	let b = _mm512_set1_ps(`1.`);
47014	let c = _mm512_set1_ps(`-1.`);
47015	let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47016	a, `0`, b, c,
47017	);
47018	assert_eq_m512(r, a);
47019	let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47020	a,
47021	`0b00000000_11111111`,
47022	b,
47023	c,
47024	);
47025	#[rustfmt::skip]
47026	let e = _mm512_setr_ps(
47027	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47028	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47029	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
47030	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
47031	);
47032	assert_eq_m512(r, e);
47033	}
47034
47035	#[simd_test(enable = "avx512f")]
47036	unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47037	let a = _mm512_set1_ps(`0.00000007`);
47038	let b = _mm512_set1_ps(`1.`);
47039	let c = _mm512_set1_ps(`-1.`);
47040	let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47041	`0`, a, b, c,
47042	);
47043	assert_eq_m512(r, _mm512_setzero_ps());
47044	let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47045	`0b00000000_11111111`,
47046	a,
47047	b,
47048	c,
47049	);
47050	#[rustfmt::skip]
47051	let e = _mm512_setr_ps(
47052	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47053	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47054	`0.`, `0.`, `0.`, `0.`,
47055	`0.`, `0.`, `0.`, `0.`,
47056	);
47057	assert_eq_m512(r, e);
47058	}
47059
47060	#[simd_test(enable = "avx512f")]
47061	unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47062	let a = _mm512_set1_ps(`0.00000007`);
47063	let b = _mm512_set1_ps(`1.`);
47064	let c = _mm512_set1_ps(`-1.`);
47065	let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47066	a, b, c, `0`,
47067	);
47068	assert_eq_m512(r, c);
47069	let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47070	a,
47071	b,
47072	c,
47073	`0b00000000_11111111`,
47074	);
47075	#[rustfmt::skip]
47076	let e = _mm512_setr_ps(
47077	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47078	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
47079	`-1.`, `-1.`, `-1.`, `-1.`,
47080	`-1.`, `-1.`, `-1.`, `-1.`,
47081	);
47082	assert_eq_m512(r, e);
47083	}
47084
47085	#[simd_test(enable = "avx512f")]
47086	unsafe fn test_mm512_fmsubadd_round_ps() {
47087	let a = _mm512_set1_ps(`0.00000007`);
47088	let b = _mm512_set1_ps(`1.`);
47089	let c = _mm512_set1_ps(`-1.`);
47090	let r =
47091	_mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
47092	#[rustfmt::skip]
47093	let e = _mm512_setr_ps(
47094	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47095	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47096	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47097	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47098	);
47099	assert_eq_m512(r, e);
47100	let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
47101	let e = _mm512_setr_ps(
47102	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
47103	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
47104	);
47105	assert_eq_m512(r, e);
47106	}
47107
47108	#[simd_test(enable = "avx512f")]
47109	unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47110	let a = _mm512_set1_ps(`0.00000007`);
47111	let b = _mm512_set1_ps(`1.`);
47112	let c = _mm512_set1_ps(`-1.`);
47113	let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47114	a, `0`, b, c,
47115	);
47116	assert_eq_m512(r, a);
47117	let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47118	a,
47119	`0b00000000_11111111`,
47120	b,
47121	c,
47122	);
47123	#[rustfmt::skip]
47124	let e = _mm512_setr_ps(
47125	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47126	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47127	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
47128	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
47129	);
47130	assert_eq_m512(r, e);
47131	}
47132
47133	#[simd_test(enable = "avx512f")]
47134	unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47135	let a = _mm512_set1_ps(`0.00000007`);
47136	let b = _mm512_set1_ps(`1.`);
47137	let c = _mm512_set1_ps(`-1.`);
47138	let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47139	`0`, a, b, c,
47140	);
47141	assert_eq_m512(r, _mm512_setzero_ps());
47142	let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47143	`0b00000000_11111111`,
47144	a,
47145	b,
47146	c,
47147	);
47148	#[rustfmt::skip]
47149	let e = _mm512_setr_ps(
47150	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47151	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47152	`0.`, `0.`, `0.`, `0.`,
47153	`0.`, `0.`, `0.`, `0.`,
47154	);
47155	assert_eq_m512(r, e);
47156	}
47157
47158	#[simd_test(enable = "avx512f")]
47159	unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47160	let a = _mm512_set1_ps(`0.00000007`);
47161	let b = _mm512_set1_ps(`1.`);
47162	let c = _mm512_set1_ps(`-1.`);
47163	let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47164	a, b, c, `0`,
47165	);
47166	assert_eq_m512(r, c);
47167	let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47168	a,
47169	b,
47170	c,
47171	`0b00000000_11111111`,
47172	);
47173	#[rustfmt::skip]
47174	let e = _mm512_setr_ps(
47175	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47176	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
47177	`-1.`, `-1.`, `-1.`, `-1.`,
47178	`-1.`, `-1.`, `-1.`, `-1.`,
47179	);
47180	assert_eq_m512(r, e);
47181	}
47182
47183	#[simd_test(enable = "avx512f")]
47184	unsafe fn test_mm512_fnmadd_round_ps() {
47185	let a = _mm512_set1_ps(`0.00000007`);
47186	let b = _mm512_set1_ps(`1.`);
47187	let c = _mm512_set1_ps(`1.`);
47188	let r =
47189	_mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
47190	let e = _mm512_set1_ps(`0.99999994`);
47191	assert_eq_m512(r, e);
47192	let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
47193	let e = _mm512_set1_ps(`0.9999999`);
47194	assert_eq_m512(r, e);
47195	}
47196
47197	#[simd_test(enable = "avx512f")]
47198	unsafe fn test_mm512_mask_fnmadd_round_ps() {
47199	let a = _mm512_set1_ps(`0.00000007`);
47200	let b = _mm512_set1_ps(`1.`);
47201	let c = _mm512_set1_ps(`1.`);
47202	let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47203	a, `0`, b, c,
47204	);
47205	assert_eq_m512(r, a);
47206	let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47207	a,
47208	`0b00000000_11111111`,
47209	b,
47210	c,
47211	);
47212	let e = _mm512_setr_ps(
47213	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
47214	`0.99999994`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
47215	`0.00000007`, `0.00000007`,
47216	);
47217	assert_eq_m512(r, e);
47218	}
47219
47220	#[simd_test(enable = "avx512f")]
47221	unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47222	let a = _mm512_set1_ps(`0.00000007`);
47223	let b = _mm512_set1_ps(`1.`);
47224	let c = _mm512_set1_ps(`1.`);
47225	let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47226	`0`, a, b, c,
47227	);
47228	assert_eq_m512(r, _mm512_setzero_ps());
47229	let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47230	`0b00000000_11111111`,
47231	a,
47232	b,
47233	c,
47234	);
47235	let e = _mm512_setr_ps(
47236	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
47237	`0.99999994`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47238	);
47239	assert_eq_m512(r, e);
47240	}
47241
47242	#[simd_test(enable = "avx512f")]
47243	unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47244	let a = _mm512_set1_ps(`0.00000007`);
47245	let b = _mm512_set1_ps(`1.`);
47246	let c = _mm512_set1_ps(`1.`);
47247	let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47248	a, b, c, `0`,
47249	);
47250	assert_eq_m512(r, c);
47251	let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47252	a,
47253	b,
47254	c,
47255	`0b00000000_11111111`,
47256	);
47257	let e = _mm512_setr_ps(
47258	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
47259	`0.99999994`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47260	);
47261	assert_eq_m512(r, e);
47262	}
47263
47264	#[simd_test(enable = "avx512f")]
47265	unsafe fn test_mm512_fnmsub_round_ps() {
47266	let a = _mm512_set1_ps(`0.00000007`);
47267	let b = _mm512_set1_ps(`1.`);
47268	let c = _mm512_set1_ps(`-1.`);
47269	let r =
47270	_mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
47271	let e = _mm512_set1_ps(`0.99999994`);
47272	assert_eq_m512(r, e);
47273	let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
47274	let e = _mm512_set1_ps(`0.9999999`);
47275	assert_eq_m512(r, e);
47276	}
47277
47278	#[simd_test(enable = "avx512f")]
47279	unsafe fn test_mm512_mask_fnmsub_round_ps() {
47280	let a = _mm512_set1_ps(`0.00000007`);
47281	let b = _mm512_set1_ps(`1.`);
47282	let c = _mm512_set1_ps(`-1.`);
47283	let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47284	a, `0`, b, c,
47285	);
47286	assert_eq_m512(r, a);
47287	let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47288	a,
47289	`0b00000000_11111111`,
47290	b,
47291	c,
47292	);
47293	let e = _mm512_setr_ps(
47294	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
47295	`0.99999994`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
47296	`0.00000007`, `0.00000007`,
47297	);
47298	assert_eq_m512(r, e);
47299	}
47300
47301	#[simd_test(enable = "avx512f")]
47302	unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47303	let a = _mm512_set1_ps(`0.00000007`);
47304	let b = _mm512_set1_ps(`1.`);
47305	let c = _mm512_set1_ps(`-1.`);
47306	let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47307	`0`, a, b, c,
47308	);
47309	assert_eq_m512(r, _mm512_setzero_ps());
47310	let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47311	`0b00000000_11111111`,
47312	a,
47313	b,
47314	c,
47315	);
47316	let e = _mm512_setr_ps(
47317	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
47318	`0.99999994`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47319	);
47320	assert_eq_m512(r, e);
47321	}
47322
47323	#[simd_test(enable = "avx512f")]
47324	unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47325	let a = _mm512_set1_ps(`0.00000007`);
47326	let b = _mm512_set1_ps(`1.`);
47327	let c = _mm512_set1_ps(`-1.`);
47328	let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47329	a, b, c, `0`,
47330	);
47331	assert_eq_m512(r, c);
47332	let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47333	a,
47334	b,
47335	c,
47336	`0b00000000_11111111`,
47337	);
47338	let e = _mm512_setr_ps(
47339	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
47340	`0.99999994`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
47341	);
47342	assert_eq_m512(r, e);
47343	}
47344
47345	#[simd_test(enable = "avx512f")]
47346	unsafe fn test_mm512_max_round_ps() {
47347	let a = _mm512_setr_ps(
47348	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47349	);
47350	let b = _mm512_setr_ps(
47351	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47352	);
47353	let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47354	let e = _mm512_setr_ps(
47355	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47356	);
47357	assert_eq_m512(r, e);
47358	}
47359
47360	#[simd_test(enable = "avx512f")]
47361	unsafe fn test_mm512_mask_max_round_ps() {
47362	let a = _mm512_setr_ps(
47363	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47364	);
47365	let b = _mm512_setr_ps(
47366	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47367	);
47368	let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
47369	assert_eq_m512(r, a);
47370	let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b00000000_11111111`, a, b);
47371	let e = _mm512_setr_ps(
47372	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47373	);
47374	assert_eq_m512(r, e);
47375	}
47376
47377	#[simd_test(enable = "avx512f")]
47378	unsafe fn test_mm512_maskz_max_round_ps() {
47379	let a = _mm512_setr_ps(
47380	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47381	);
47382	let b = _mm512_setr_ps(
47383	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47384	);
47385	let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
47386	assert_eq_m512(r, _mm512_setzero_ps());
47387	let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b00000000_11111111`, a, b);
47388	let e = _mm512_setr_ps(
47389	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47390	);
47391	assert_eq_m512(r, e);
47392	}
47393
47394	#[simd_test(enable = "avx512f")]
47395	unsafe fn test_mm512_min_round_ps() {
47396	let a = _mm512_setr_ps(
47397	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47398	);
47399	let b = _mm512_setr_ps(
47400	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47401	);
47402	let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47403	let e = _mm512_setr_ps(
47404	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47405	);
47406	assert_eq_m512(r, e);
47407	}
47408
47409	#[simd_test(enable = "avx512f")]
47410	unsafe fn test_mm512_mask_min_round_ps() {
47411	let a = _mm512_setr_ps(
47412	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47413	);
47414	let b = _mm512_setr_ps(
47415	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47416	);
47417	let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
47418	assert_eq_m512(r, a);
47419	let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b00000000_11111111`, a, b);
47420	let e = _mm512_setr_ps(
47421	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47422	);
47423	assert_eq_m512(r, e);
47424	}
47425
47426	#[simd_test(enable = "avx512f")]
47427	unsafe fn test_mm512_maskz_min_round_ps() {
47428	let a = _mm512_setr_ps(
47429	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47430	);
47431	let b = _mm512_setr_ps(
47432	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
47433	);
47434	let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
47435	assert_eq_m512(r, _mm512_setzero_ps());
47436	let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b00000000_11111111`, a, b);
47437	let e = _mm512_setr_ps(
47438	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47439	);
47440	assert_eq_m512(r, e);
47441	}
47442
47443	#[simd_test(enable = "avx512f")]
47444	unsafe fn test_mm512_getexp_round_ps() {
47445	let a = _mm512_set1_ps(`3.`);
47446	let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47447	let e = _mm512_set1_ps(`1.`);
47448	assert_eq_m512(r, e);
47449	}
47450
47451	#[simd_test(enable = "avx512f")]
47452	unsafe fn test_mm512_mask_getexp_round_ps() {
47453	let a = _mm512_set1_ps(`3.`);
47454	let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a);
47455	assert_eq_m512(r, a);
47456	let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111_00000000`, a);
47457	let e = _mm512_setr_ps(
47458	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47459	);
47460	assert_eq_m512(r, e);
47461	}
47462
47463	#[simd_test(enable = "avx512f")]
47464	unsafe fn test_mm512_maskz_getexp_round_ps() {
47465	let a = _mm512_set1_ps(`3.`);
47466	let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a);
47467	assert_eq_m512(r, _mm512_setzero_ps());
47468	let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b11111111_00000000`, a);
47469	let e = _mm512_setr_ps(
47470	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47471	);
47472	assert_eq_m512(r, e);
47473	}
47474
47475	#[simd_test(enable = "avx512f")]
47476	unsafe fn test_mm512_roundscale_round_ps() {
47477	let a = _mm512_set1_ps(`1.1`);
47478	let r = _mm512_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(a);
47479	let e = _mm512_set1_ps(`1.0`);
47480	assert_eq_m512(r, e);
47481	}
47482
47483	#[simd_test(enable = "avx512f")]
47484	unsafe fn test_mm512_mask_roundscale_round_ps() {
47485	let a = _mm512_set1_ps(`1.1`);
47486	let r = _mm512_mask_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a);
47487	let e = _mm512_set1_ps(`1.1`);
47488	assert_eq_m512(r, e);
47489	let r = _mm512_mask_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(
47490	a,
47491	`0b11111111_11111111`,
47492	a,
47493	);
47494	let e = _mm512_set1_ps(`1.0`);
47495	assert_eq_m512(r, e);
47496	}
47497
47498	#[simd_test(enable = "avx512f")]
47499	unsafe fn test_mm512_maskz_roundscale_round_ps() {
47500	let a = _mm512_set1_ps(`1.1`);
47501	let r = _mm512_maskz_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a);
47502	assert_eq_m512(r, _mm512_setzero_ps());
47503	let r =
47504	_mm512_maskz_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111_11111111`, a);
47505	let e = _mm512_set1_ps(`1.0`);
47506	assert_eq_m512(r, e);
47507	}
47508
47509	#[simd_test(enable = "avx512f")]
47510	unsafe fn test_mm512_scalef_round_ps() {
47511	let a = _mm512_set1_ps(`1.`);
47512	let b = _mm512_set1_ps(`3.`);
47513	let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
47514	let e = _mm512_set1_ps(`8.`);
47515	assert_eq_m512(r, e);
47516	}
47517
47518	#[simd_test(enable = "avx512f")]
47519	unsafe fn test_mm512_mask_scalef_round_ps() {
47520	let a = _mm512_set1_ps(`1.`);
47521	let b = _mm512_set1_ps(`3.`);
47522	let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47523	a, `0`, a, b,
47524	);
47525	assert_eq_m512(r, a);
47526	let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47527	a,
47528	`0b11111111_00000000`,
47529	a,
47530	b,
47531	);
47532	let e = _mm512_set_ps(
47533	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47534	);
47535	assert_eq_m512(r, e);
47536	}
47537
47538	#[simd_test(enable = "avx512f")]
47539	unsafe fn test_mm512_maskz_scalef_round_ps() {
47540	let a = _mm512_set1_ps(`1.`);
47541	let b = _mm512_set1_ps(`3.`);
47542	let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47543	`0`, a, b,
47544	);
47545	assert_eq_m512(r, _mm512_setzero_ps());
47546	let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
47547	`0b11111111_00000000`,
47548	a,
47549	b,
47550	);
47551	let e = _mm512_set_ps(
47552	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47553	);
47554	assert_eq_m512(r, e);
47555	}
47556
47557	#[simd_test(enable = "avx512f")]
47558	unsafe fn test_mm512_fixupimm_round_ps() {
47559	let a = _mm512_set1_ps(f32::NAN);
47560	let b = _mm512_set1_ps(f32::MAX);
47561	let c = _mm512_set1_epi32(i32::MAX);
47562	let r = _mm512_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47563	let e = _mm512_set1_ps(`0.0`);
47564	assert_eq_m512(r, e);
47565	}
47566
47567	#[simd_test(enable = "avx512f")]
47568	unsafe fn test_mm512_mask_fixupimm_round_ps() {
47569	#[rustfmt::skip]
47570	let a = _mm512_set_ps(
47571	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47572	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47573	`1.`, `1.`, `1.`, `1.`,
47574	`1.`, `1.`, `1.`, `1.`,
47575	);
47576	let b = _mm512_set1_ps(f32::MAX);
47577	let c = _mm512_set1_epi32(i32::MAX);
47578	let r = _mm512_mask_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(
47579	a,
47580	`0b11111111_00000000`,
47581	b,
47582	c,
47583	);
47584	let e = _mm512_set_ps(
47585	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47586	);
47587	assert_eq_m512(r, e);
47588	}
47589
47590	#[simd_test(enable = "avx512f")]
47591	unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47592	#[rustfmt::skip]
47593	let a = _mm512_set_ps(
47594	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47595	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47596	`1.`, `1.`, `1.`, `1.`,
47597	`1.`, `1.`, `1.`, `1.`,
47598	);
47599	let b = _mm512_set1_ps(f32::MAX);
47600	let c = _mm512_set1_epi32(i32::MAX);
47601	let r = _mm512_maskz_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(
47602	`0b11111111_00000000`,
47603	a,
47604	b,
47605	c,
47606	);
47607	let e = _mm512_set_ps(
47608	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47609	);
47610	assert_eq_m512(r, e);
47611	}
47612
47613	#[simd_test(enable = "avx512f")]
47614	unsafe fn test_mm512_getmant_round_ps() {
47615	let a = _mm512_set1_ps(`10.`);
47616	let r = _mm512_getmant_round_ps::<
47617	_MM_MANT_NORM_1_2,
47618	_MM_MANT_SIGN_SRC,
47619	_MM_FROUND_CUR_DIRECTION,
47620	>(a);
47621	let e = _mm512_set1_ps(`1.25`);
47622	assert_eq_m512(r, e);
47623	}
47624
47625	#[simd_test(enable = "avx512f")]
47626	unsafe fn test_mm512_mask_getmant_round_ps() {
47627	let a = _mm512_set1_ps(`10.`);
47628	let r = _mm512_mask_getmant_round_ps::<
47629	_MM_MANT_NORM_1_2,
47630	_MM_MANT_SIGN_SRC,
47631	_MM_FROUND_CUR_DIRECTION,
47632	>(a, `0`, a);
47633	assert_eq_m512(r, a);
47634	let r = _mm512_mask_getmant_round_ps::<
47635	_MM_MANT_NORM_1_2,
47636	_MM_MANT_SIGN_SRC,
47637	_MM_FROUND_CUR_DIRECTION,
47638	>(a, `0b11111111_00000000`, a);
47639	let e = _mm512_setr_ps(
47640	`10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
47641	);
47642	assert_eq_m512(r, e);
47643	}
47644
47645	#[simd_test(enable = "avx512f")]
47646	unsafe fn test_mm512_maskz_getmant_round_ps() {
47647	let a = _mm512_set1_ps(`10.`);
47648	let r = _mm512_maskz_getmant_round_ps::<
47649	_MM_MANT_NORM_1_2,
47650	_MM_MANT_SIGN_SRC,
47651	_MM_FROUND_CUR_DIRECTION,
47652	>(`0`, a);
47653	assert_eq_m512(r, _mm512_setzero_ps());
47654	let r = _mm512_maskz_getmant_round_ps::<
47655	_MM_MANT_NORM_1_2,
47656	_MM_MANT_SIGN_SRC,
47657	_MM_FROUND_CUR_DIRECTION,
47658	>(`0b11111111_00000000`, a);
47659	let e = _mm512_setr_ps(
47660	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
47661	);
47662	assert_eq_m512(r, e);
47663	}
47664
47665	#[simd_test(enable = "avx512f")]
47666	unsafe fn test_mm512_cvtps_epi32() {
47667	let a = _mm512_setr_ps(
47668	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47669	);
47670	let r = _mm512_cvtps_epi32(a);
47671	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47672	assert_eq_m512i(r, e);
47673	}
47674
47675	#[simd_test(enable = "avx512f")]
47676	unsafe fn test_mm512_mask_cvtps_epi32() {
47677	let a = _mm512_setr_ps(
47678	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47679	);
47680	let src = _mm512_set1_epi32(`0`);
47681	let r = _mm512_mask_cvtps_epi32(src, `0`, a);
47682	assert_eq_m512i(r, src);
47683	let r = _mm512_mask_cvtps_epi32(src, `0b00000000_11111111`, a);
47684	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47685	assert_eq_m512i(r, e);
47686	}
47687
47688	#[simd_test(enable = "avx512f")]
47689	unsafe fn test_mm512_maskz_cvtps_epi32() {
47690	let a = _mm512_setr_ps(
47691	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47692	);
47693	let r = _mm512_maskz_cvtps_epi32(`0`, a);
47694	assert_eq_m512i(r, _mm512_setzero_si512());
47695	let r = _mm512_maskz_cvtps_epi32(`0b00000000_11111111`, a);
47696	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47697	assert_eq_m512i(r, e);
47698	}
47699
47700	#[simd_test(enable = "avx512f,avx512vl")]
47701	unsafe fn test_mm256_mask_cvtps_epi32() {
47702	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47703	let src = _mm256_set1_epi32(`0`);
47704	let r = _mm256_mask_cvtps_epi32(src, `0`, a);
47705	assert_eq_m256i(r, src);
47706	let r = _mm256_mask_cvtps_epi32(src, `0b11111111`, a);
47707	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47708	assert_eq_m256i(r, e);
47709	}
47710
47711	#[simd_test(enable = "avx512f,avx512vl")]
47712	unsafe fn test_mm256_maskz_cvtps_epi32() {
47713	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47714	let r = _mm256_maskz_cvtps_epi32(`0`, a);
47715	assert_eq_m256i(r, _mm256_setzero_si256());
47716	let r = _mm256_maskz_cvtps_epi32(`0b11111111`, a);
47717	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47718	assert_eq_m256i(r, e);
47719	}
47720
47721	#[simd_test(enable = "avx512f,avx512vl")]
47722	unsafe fn test_mm_mask_cvtps_epi32() {
47723	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47724	let src = _mm_set1_epi32(`0`);
47725	let r = _mm_mask_cvtps_epi32(src, `0`, a);
47726	assert_eq_m128i(r, src);
47727	let r = _mm_mask_cvtps_epi32(src, `0b00001111`, a);
47728	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
47729	assert_eq_m128i(r, e);
47730	}
47731
47732	#[simd_test(enable = "avx512f,avx512vl")]
47733	unsafe fn test_mm_maskz_cvtps_epi32() {
47734	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47735	let r = _mm_maskz_cvtps_epi32(`0`, a);
47736	assert_eq_m128i(r, _mm_setzero_si128());
47737	let r = _mm_maskz_cvtps_epi32(`0b00001111`, a);
47738	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
47739	assert_eq_m128i(r, e);
47740	}
47741
47742	#[simd_test(enable = "avx512f")]
47743	unsafe fn test_mm512_cvtps_epu32() {
47744	let a = _mm512_setr_ps(
47745	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47746	);
47747	let r = _mm512_cvtps_epu32(a);
47748	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47749	assert_eq_m512i(r, e);
47750	}
47751
47752	#[simd_test(enable = "avx512f")]
47753	unsafe fn test_mm512_mask_cvtps_epu32() {
47754	let a = _mm512_setr_ps(
47755	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47756	);
47757	let src = _mm512_set1_epi32(`0`);
47758	let r = _mm512_mask_cvtps_epu32(src, `0`, a);
47759	assert_eq_m512i(r, src);
47760	let r = _mm512_mask_cvtps_epu32(src, `0b00000000_11111111`, a);
47761	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47762	assert_eq_m512i(r, e);
47763	}
47764
47765	#[simd_test(enable = "avx512f")]
47766	unsafe fn test_mm512_maskz_cvtps_epu32() {
47767	let a = _mm512_setr_ps(
47768	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
47769	);
47770	let r = _mm512_maskz_cvtps_epu32(`0`, a);
47771	assert_eq_m512i(r, _mm512_setzero_si512());
47772	let r = _mm512_maskz_cvtps_epu32(`0b00000000_11111111`, a);
47773	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
47774	assert_eq_m512i(r, e);
47775	}
47776
47777	#[simd_test(enable = "avx512f,avx512vl")]
47778	unsafe fn test_mm256_cvtps_epu32() {
47779	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47780	let r = _mm256_cvtps_epu32(a);
47781	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47782	assert_eq_m256i(r, e);
47783	}
47784
47785	#[simd_test(enable = "avx512f,avx512vl")]
47786	unsafe fn test_mm256_mask_cvtps_epu32() {
47787	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47788	let src = _mm256_set1_epi32(`0`);
47789	let r = _mm256_mask_cvtps_epu32(src, `0`, a);
47790	assert_eq_m256i(r, src);
47791	let r = _mm256_mask_cvtps_epu32(src, `0b11111111`, a);
47792	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47793	assert_eq_m256i(r, e);
47794	}
47795
47796	#[simd_test(enable = "avx512f,avx512vl")]
47797	unsafe fn test_mm256_maskz_cvtps_epu32() {
47798	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
47799	let r = _mm256_maskz_cvtps_epu32(`0`, a);
47800	assert_eq_m256i(r, _mm256_setzero_si256());
47801	let r = _mm256_maskz_cvtps_epu32(`0b11111111`, a);
47802	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
47803	assert_eq_m256i(r, e);
47804	}
47805
47806	#[simd_test(enable = "avx512f,avx512vl")]
47807	unsafe fn test_mm_cvtps_epu32() {
47808	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47809	let r = _mm_cvtps_epu32(a);
47810	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
47811	assert_eq_m128i(r, e);
47812	}
47813
47814	#[simd_test(enable = "avx512f,avx512vl")]
47815	unsafe fn test_mm_mask_cvtps_epu32() {
47816	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47817	let src = _mm_set1_epi32(`0`);
47818	let r = _mm_mask_cvtps_epu32(src, `0`, a);
47819	assert_eq_m128i(r, src);
47820	let r = _mm_mask_cvtps_epu32(src, `0b00001111`, a);
47821	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
47822	assert_eq_m128i(r, e);
47823	}
47824
47825	#[simd_test(enable = "avx512f,avx512vl")]
47826	unsafe fn test_mm_maskz_cvtps_epu32() {
47827	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
47828	let r = _mm_maskz_cvtps_epu32(`0`, a);
47829	assert_eq_m128i(r, _mm_setzero_si128());
47830	let r = _mm_maskz_cvtps_epu32(`0b00001111`, a);
47831	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
47832	assert_eq_m128i(r, e);
47833	}
47834
47835	#[simd_test(enable = "avx512f")]
47836	unsafe fn test_mm512_cvtepi8_epi32() {
47837	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47838	let r = _mm512_cvtepi8_epi32(a);
47839	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47840	assert_eq_m512i(r, e);
47841	}
47842
47843	#[simd_test(enable = "avx512f")]
47844	unsafe fn test_mm512_mask_cvtepi8_epi32() {
47845	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47846	let src = _mm512_set1_epi32(`-1`);
47847	let r = _mm512_mask_cvtepi8_epi32(src, `0`, a);
47848	assert_eq_m512i(r, src);
47849	let r = _mm512_mask_cvtepi8_epi32(src, `0b00000000_11111111`, a);
47850	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47851	assert_eq_m512i(r, e);
47852	}
47853
47854	#[simd_test(enable = "avx512f")]
47855	unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47856	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47857	let r = _mm512_maskz_cvtepi8_epi32(`0`, a);
47858	assert_eq_m512i(r, _mm512_setzero_si512());
47859	let r = _mm512_maskz_cvtepi8_epi32(`0b00000000_11111111`, a);
47860	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47861	assert_eq_m512i(r, e);
47862	}
47863
47864	#[simd_test(enable = "avx512f,avx512vl")]
47865	unsafe fn test_mm256_mask_cvtepi8_epi32() {
47866	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47867	let src = _mm256_set1_epi32(`-1`);
47868	let r = _mm256_mask_cvtepi8_epi32(src, `0`, a);
47869	assert_eq_m256i(r, src);
47870	let r = _mm256_mask_cvtepi8_epi32(src, `0b11111111`, a);
47871	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47872	assert_eq_m256i(r, e);
47873	}
47874
47875	#[simd_test(enable = "avx512f,avx512vl")]
47876	unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47877	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47878	let r = _mm256_maskz_cvtepi8_epi32(`0`, a);
47879	assert_eq_m256i(r, _mm256_setzero_si256());
47880	let r = _mm256_maskz_cvtepi8_epi32(`0b11111111`, a);
47881	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47882	assert_eq_m256i(r, e);
47883	}
47884
47885	#[simd_test(enable = "avx512f,avx512vl")]
47886	unsafe fn test_mm_mask_cvtepi8_epi32() {
47887	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47888	let src = _mm_set1_epi32(`-1`);
47889	let r = _mm_mask_cvtepi8_epi32(src, `0`, a);
47890	assert_eq_m128i(r, src);
47891	let r = _mm_mask_cvtepi8_epi32(src, `0b00001111`, a);
47892	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47893	assert_eq_m128i(r, e);
47894	}
47895
47896	#[simd_test(enable = "avx512f,avx512vl")]
47897	unsafe fn test_mm_maskz_cvtepi8_epi32() {
47898	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47899	let r = _mm_maskz_cvtepi8_epi32(`0`, a);
47900	assert_eq_m128i(r, _mm_setzero_si128());
47901	let r = _mm_maskz_cvtepi8_epi32(`0b00001111`, a);
47902	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47903	assert_eq_m128i(r, e);
47904	}
47905
47906	#[simd_test(enable = "avx512f")]
47907	unsafe fn test_mm512_cvtepu8_epi32() {
47908	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47909	let r = _mm512_cvtepu8_epi32(a);
47910	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47911	assert_eq_m512i(r, e);
47912	}
47913
47914	#[simd_test(enable = "avx512f")]
47915	unsafe fn test_mm512_mask_cvtepu8_epi32() {
47916	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47917	let src = _mm512_set1_epi32(`-1`);
47918	let r = _mm512_mask_cvtepu8_epi32(src, `0`, a);
47919	assert_eq_m512i(r, src);
47920	let r = _mm512_mask_cvtepu8_epi32(src, `0b00000000_11111111`, a);
47921	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47922	assert_eq_m512i(r, e);
47923	}
47924
47925	#[simd_test(enable = "avx512f")]
47926	unsafe fn test_mm512_maskz_cvtepu8_epi32() {
47927	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47928	let r = _mm512_maskz_cvtepu8_epi32(`0`, a);
47929	assert_eq_m512i(r, _mm512_setzero_si512());
47930	let r = _mm512_maskz_cvtepu8_epi32(`0b00000000_11111111`, a);
47931	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47932	assert_eq_m512i(r, e);
47933	}
47934
47935	#[simd_test(enable = "avx512f,avx512vl")]
47936	unsafe fn test_mm256_mask_cvtepu8_epi32() {
47937	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47938	let src = _mm256_set1_epi32(`-1`);
47939	let r = _mm256_mask_cvtepu8_epi32(src, `0`, a);
47940	assert_eq_m256i(r, src);
47941	let r = _mm256_mask_cvtepu8_epi32(src, `0b11111111`, a);
47942	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47943	assert_eq_m256i(r, e);
47944	}
47945
47946	#[simd_test(enable = "avx512f,avx512vl")]
47947	unsafe fn test_mm256_maskz_cvtepu8_epi32() {
47948	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47949	let r = _mm256_maskz_cvtepu8_epi32(`0`, a);
47950	assert_eq_m256i(r, _mm256_setzero_si256());
47951	let r = _mm256_maskz_cvtepu8_epi32(`0b11111111`, a);
47952	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47953	assert_eq_m256i(r, e);
47954	}
47955
47956	#[simd_test(enable = "avx512f,avx512vl")]
47957	unsafe fn test_mm_mask_cvtepu8_epi32() {
47958	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47959	let src = _mm_set1_epi32(`-1`);
47960	let r = _mm_mask_cvtepu8_epi32(src, `0`, a);
47961	assert_eq_m128i(r, src);
47962	let r = _mm_mask_cvtepu8_epi32(src, `0b00001111`, a);
47963	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47964	assert_eq_m128i(r, e);
47965	}
47966
47967	#[simd_test(enable = "avx512f,avx512vl")]
47968	unsafe fn test_mm_maskz_cvtepu8_epi32() {
47969	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47970	let r = _mm_maskz_cvtepu8_epi32(`0`, a);
47971	assert_eq_m128i(r, _mm_setzero_si128());
47972	let r = _mm_maskz_cvtepu8_epi32(`0b00001111`, a);
47973	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
47974	assert_eq_m128i(r, e);
47975	}
47976
47977	#[simd_test(enable = "avx512f")]
47978	unsafe fn test_mm512_cvtepi16_epi32() {
47979	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47980	let r = _mm512_cvtepi16_epi32(a);
47981	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47982	assert_eq_m512i(r, e);
47983	}
47984
47985	#[simd_test(enable = "avx512f")]
47986	unsafe fn test_mm512_mask_cvtepi16_epi32() {
47987	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47988	let src = _mm512_set1_epi32(`-1`);
47989	let r = _mm512_mask_cvtepi16_epi32(src, `0`, a);
47990	assert_eq_m512i(r, src);
47991	let r = _mm512_mask_cvtepi16_epi32(src, `0b00000000_11111111`, a);
47992	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47993	assert_eq_m512i(r, e);
47994	}
47995
47996	#[simd_test(enable = "avx512f")]
47997	unsafe fn test_mm512_maskz_cvtepi16_epi32() {
47998	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
47999	let r = _mm512_maskz_cvtepi16_epi32(`0`, a);
48000	assert_eq_m512i(r, _mm512_setzero_si512());
48001	let r = _mm512_maskz_cvtepi16_epi32(`0b00000000_11111111`, a);
48002	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48003	assert_eq_m512i(r, e);
48004	}
48005
48006	#[simd_test(enable = "avx512f,avx512vl")]
48007	unsafe fn test_mm256_mask_cvtepi16_epi32() {
48008	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48009	let src = _mm256_set1_epi32(`-1`);
48010	let r = _mm256_mask_cvtepi16_epi32(src, `0`, a);
48011	assert_eq_m256i(r, src);
48012	let r = _mm256_mask_cvtepi16_epi32(src, `0b11111111`, a);
48013	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48014	assert_eq_m256i(r, e);
48015	}
48016
48017	#[simd_test(enable = "avx512f,avx512vl")]
48018	unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48019	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48020	let r = _mm256_maskz_cvtepi16_epi32(`0`, a);
48021	assert_eq_m256i(r, _mm256_setzero_si256());
48022	let r = _mm256_maskz_cvtepi16_epi32(`0b11111111`, a);
48023	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48024	assert_eq_m256i(r, e);
48025	}
48026
48027	#[simd_test(enable = "avx512f,avx512vl")]
48028	unsafe fn test_mm_mask_cvtepi16_epi32() {
48029	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48030	let src = _mm_set1_epi32(`-1`);
48031	let r = _mm_mask_cvtepi16_epi32(src, `0`, a);
48032	assert_eq_m128i(r, src);
48033	let r = _mm_mask_cvtepi16_epi32(src, `0b00001111`, a);
48034	let e = _mm_set_epi32(`4`, `5`, `6`, `7`);
48035	assert_eq_m128i(r, e);
48036	}
48037
48038	#[simd_test(enable = "avx512f,avx512vl")]
48039	unsafe fn test_mm_maskz_cvtepi16_epi32() {
48040	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48041	let r = _mm_maskz_cvtepi16_epi32(`0`, a);
48042	assert_eq_m128i(r, _mm_setzero_si128());
48043	let r = _mm_maskz_cvtepi16_epi32(`0b00001111`, a);
48044	let e = _mm_set_epi32(`4`, `5`, `6`, `7`);
48045	assert_eq_m128i(r, e);
48046	}
48047
48048	#[simd_test(enable = "avx512f")]
48049	unsafe fn test_mm512_cvtepu16_epi32() {
48050	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48051	let r = _mm512_cvtepu16_epi32(a);
48052	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48053	assert_eq_m512i(r, e);
48054	}
48055
48056	#[simd_test(enable = "avx512f")]
48057	unsafe fn test_mm512_mask_cvtepu16_epi32() {
48058	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48059	let src = _mm512_set1_epi32(`-1`);
48060	let r = _mm512_mask_cvtepu16_epi32(src, `0`, a);
48061	assert_eq_m512i(r, src);
48062	let r = _mm512_mask_cvtepu16_epi32(src, `0b00000000_11111111`, a);
48063	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48064	assert_eq_m512i(r, e);
48065	}
48066
48067	#[simd_test(enable = "avx512f")]
48068	unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48069	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48070	let r = _mm512_maskz_cvtepu16_epi32(`0`, a);
48071	assert_eq_m512i(r, _mm512_setzero_si512());
48072	let r = _mm512_maskz_cvtepu16_epi32(`0b00000000_11111111`, a);
48073	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48074	assert_eq_m512i(r, e);
48075	}
48076
48077	#[simd_test(enable = "avx512f,avx512vl")]
48078	unsafe fn test_mm256_mask_cvtepu16_epi32() {
48079	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48080	let src = _mm256_set1_epi32(`-1`);
48081	let r = _mm256_mask_cvtepu16_epi32(src, `0`, a);
48082	assert_eq_m256i(r, src);
48083	let r = _mm256_mask_cvtepu16_epi32(src, `0b11111111`, a);
48084	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48085	assert_eq_m256i(r, e);
48086	}
48087
48088	#[simd_test(enable = "avx512f,avx512vl")]
48089	unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48090	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48091	let r = _mm256_maskz_cvtepu16_epi32(`0`, a);
48092	assert_eq_m256i(r, _mm256_setzero_si256());
48093	let r = _mm256_maskz_cvtepu16_epi32(`0b11111111`, a);
48094	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48095	assert_eq_m256i(r, e);
48096	}
48097
48098	#[simd_test(enable = "avx512f,avx512vl")]
48099	unsafe fn test_mm_mask_cvtepu16_epi32() {
48100	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48101	let src = _mm_set1_epi32(`-1`);
48102	let r = _mm_mask_cvtepu16_epi32(src, `0`, a);
48103	assert_eq_m128i(r, src);
48104	let r = _mm_mask_cvtepu16_epi32(src, `0b00001111`, a);
48105	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
48106	assert_eq_m128i(r, e);
48107	}
48108
48109	#[simd_test(enable = "avx512f,avx512vl")]
48110	unsafe fn test_mm_maskz_cvtepu16_epi32() {
48111	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48112	let r = _mm_maskz_cvtepu16_epi32(`0`, a);
48113	assert_eq_m128i(r, _mm_setzero_si128());
48114	let r = _mm_maskz_cvtepu16_epi32(`0b00001111`, a);
48115	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
48116	assert_eq_m128i(r, e);
48117	}
48118
48119	#[simd_test(enable = "avx512f")]
48120	unsafe fn test_mm512_cvtepi32_ps() {
48121	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48122	let r = _mm512_cvtepi32_ps(a);
48123	let e = _mm512_set_ps(
48124	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
48125	);
48126	assert_eq_m512(r, e);
48127	}
48128
48129	#[simd_test(enable = "avx512f")]
48130	unsafe fn test_mm512_mask_cvtepi32_ps() {
48131	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48132	let src = _mm512_set1_ps(`-1.`);
48133	let r = _mm512_mask_cvtepi32_ps(src, `0`, a);
48134	assert_eq_m512(r, src);
48135	let r = _mm512_mask_cvtepi32_ps(src, `0b00000000_11111111`, a);
48136	let e = _mm512_set_ps(
48137	`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
48138	);
48139	assert_eq_m512(r, e);
48140	}
48141
48142	#[simd_test(enable = "avx512f")]
48143	unsafe fn test_mm512_maskz_cvtepi32_ps() {
48144	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48145	let r = _mm512_maskz_cvtepi32_ps(`0`, a);
48146	assert_eq_m512(r, _mm512_setzero_ps());
48147	let r = _mm512_maskz_cvtepi32_ps(`0b00000000_11111111`, a);
48148	let e = _mm512_set_ps(
48149	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
48150	);
48151	assert_eq_m512(r, e);
48152	}
48153
48154	#[simd_test(enable = "avx512f,avx512vl")]
48155	unsafe fn test_mm256_mask_cvtepi32_ps() {
48156	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48157	let src = _mm256_set1_ps(`-1.`);
48158	let r = _mm256_mask_cvtepi32_ps(src, `0`, a);
48159	assert_eq_m256(r, src);
48160	let r = _mm256_mask_cvtepi32_ps(src, `0b11111111`, a);
48161	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
48162	assert_eq_m256(r, e);
48163	}
48164
48165	#[simd_test(enable = "avx512f,avx512vl")]
48166	unsafe fn test_mm256_maskz_cvtepi32_ps() {
48167	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48168	let r = _mm256_maskz_cvtepi32_ps(`0`, a);
48169	assert_eq_m256(r, _mm256_setzero_ps());
48170	let r = _mm256_maskz_cvtepi32_ps(`0b11111111`, a);
48171	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
48172	assert_eq_m256(r, e);
48173	}
48174
48175	#[simd_test(enable = "avx512f,avx512vl")]
48176	unsafe fn test_mm_mask_cvtepi32_ps() {
48177	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
48178	let src = _mm_set1_ps(`-1.`);
48179	let r = _mm_mask_cvtepi32_ps(src, `0`, a);
48180	assert_eq_m128(r, src);
48181	let r = _mm_mask_cvtepi32_ps(src, `0b00001111`, a);
48182	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
48183	assert_eq_m128(r, e);
48184	}
48185
48186	#[simd_test(enable = "avx512f,avx512vl")]
48187	unsafe fn test_mm_maskz_cvtepi32_ps() {
48188	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
48189	let r = _mm_maskz_cvtepi32_ps(`0`, a);
48190	assert_eq_m128(r, _mm_setzero_ps());
48191	let r = _mm_maskz_cvtepi32_ps(`0b00001111`, a);
48192	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
48193	assert_eq_m128(r, e);
48194	}
48195
48196	#[simd_test(enable = "avx512f")]
48197	unsafe fn test_mm512_cvtepu32_ps() {
48198	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48199	let r = _mm512_cvtepu32_ps(a);
48200	let e = _mm512_set_ps(
48201	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
48202	);
48203	assert_eq_m512(r, e);
48204	}
48205
48206	#[simd_test(enable = "avx512f")]
48207	unsafe fn test_mm512_mask_cvtepu32_ps() {
48208	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48209	let src = _mm512_set1_ps(`-1.`);
48210	let r = _mm512_mask_cvtepu32_ps(src, `0`, a);
48211	assert_eq_m512(r, src);
48212	let r = _mm512_mask_cvtepu32_ps(src, `0b00000000_11111111`, a);
48213	let e = _mm512_set_ps(
48214	`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
48215	);
48216	assert_eq_m512(r, e);
48217	}
48218
48219	#[simd_test(enable = "avx512f")]
48220	unsafe fn test_mm512_maskz_cvtepu32_ps() {
48221	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48222	let r = _mm512_maskz_cvtepu32_ps(`0`, a);
48223	assert_eq_m512(r, _mm512_setzero_ps());
48224	let r = _mm512_maskz_cvtepu32_ps(`0b00000000_11111111`, a);
48225	let e = _mm512_set_ps(
48226	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
48227	);
48228	assert_eq_m512(r, e);
48229	}
48230
48231	#[simd_test(enable = "avx512f")]
48232	unsafe fn test_mm512_cvtepi32_epi16() {
48233	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48234	let r = _mm512_cvtepi32_epi16(a);
48235	let e = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48236	assert_eq_m256i(r, e);
48237	}
48238
48239	#[simd_test(enable = "avx512f")]
48240	unsafe fn test_mm512_mask_cvtepi32_epi16() {
48241	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48242	let src = _mm256_set1_epi16(`-1`);
48243	let r = _mm512_mask_cvtepi32_epi16(src, `0`, a);
48244	assert_eq_m256i(r, src);
48245	let r = _mm512_mask_cvtepi32_epi16(src, `0b00000000_11111111`, a);
48246	let e = _mm256_set_epi16(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48247	assert_eq_m256i(r, e);
48248	}
48249
48250	#[simd_test(enable = "avx512f")]
48251	unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48252	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48253	let r = _mm512_maskz_cvtepi32_epi16(`0`, a);
48254	assert_eq_m256i(r, _mm256_setzero_si256());
48255	let r = _mm512_maskz_cvtepi32_epi16(`0b00000000_11111111`, a);
48256	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48257	assert_eq_m256i(r, e);
48258	}
48259
48260	#[simd_test(enable = "avx512f,avx512vl")]
48261	unsafe fn test_mm256_cvtepi32_epi16() {
48262	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48263	let r = _mm256_cvtepi32_epi16(a);
48264	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48265	assert_eq_m128i(r, e);
48266	}
48267
48268	#[simd_test(enable = "avx512f,avx512vl")]
48269	unsafe fn test_mm256_mask_cvtepi32_epi16() {
48270	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48271	let src = _mm_set1_epi16(`-1`);
48272	let r = _mm256_mask_cvtepi32_epi16(src, `0`, a);
48273	assert_eq_m128i(r, src);
48274	let r = _mm256_mask_cvtepi32_epi16(src, `0b11111111`, a);
48275	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48276	assert_eq_m128i(r, e);
48277	}
48278
48279	#[simd_test(enable = "avx512f,avx512vl")]
48280	unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48281	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48282	let r = _mm256_maskz_cvtepi32_epi16(`0`, a);
48283	assert_eq_m128i(r, _mm_setzero_si128());
48284	let r = _mm256_maskz_cvtepi32_epi16(`0b11111111`, a);
48285	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48286	assert_eq_m128i(r, e);
48287	}
48288
48289	#[simd_test(enable = "avx512f,avx512vl")]
48290	unsafe fn test_mm_cvtepi32_epi16() {
48291	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48292	let r = _mm_cvtepi32_epi16(a);
48293	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48294	assert_eq_m128i(r, e);
48295	}
48296
48297	#[simd_test(enable = "avx512f,avx512vl")]
48298	unsafe fn test_mm_mask_cvtepi32_epi16() {
48299	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48300	let src = _mm_set1_epi16(`0`);
48301	let r = _mm_mask_cvtepi32_epi16(src, `0`, a);
48302	assert_eq_m128i(r, src);
48303	let r = _mm_mask_cvtepi32_epi16(src, `0b00001111`, a);
48304	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48305	assert_eq_m128i(r, e);
48306	}
48307
48308	#[simd_test(enable = "avx512f,avx512vl")]
48309	unsafe fn test_mm_maskz_cvtepi32_epi16() {
48310	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48311	let r = _mm_maskz_cvtepi32_epi16(`0`, a);
48312	assert_eq_m128i(r, _mm_setzero_si128());
48313	let r = _mm_maskz_cvtepi32_epi16(`0b00001111`, a);
48314	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48315	assert_eq_m128i(r, e);
48316	}
48317
48318	#[simd_test(enable = "avx512f")]
48319	unsafe fn test_mm512_cvtepi32_epi8() {
48320	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48321	let r = _mm512_cvtepi32_epi8(a);
48322	let e = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48323	assert_eq_m128i(r, e);
48324	}
48325
48326	#[simd_test(enable = "avx512f")]
48327	unsafe fn test_mm512_mask_cvtepi32_epi8() {
48328	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48329	let src = _mm_set1_epi8(`-1`);
48330	let r = _mm512_mask_cvtepi32_epi8(src, `0`, a);
48331	assert_eq_m128i(r, src);
48332	let r = _mm512_mask_cvtepi32_epi8(src, `0b00000000_11111111`, a);
48333	let e = _mm_set_epi8(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48334	assert_eq_m128i(r, e);
48335	}
48336
48337	#[simd_test(enable = "avx512f")]
48338	unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48339	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48340	let r = _mm512_maskz_cvtepi32_epi8(`0`, a);
48341	assert_eq_m128i(r, _mm_setzero_si128());
48342	let r = _mm512_maskz_cvtepi32_epi8(`0b00000000_11111111`, a);
48343	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48344	assert_eq_m128i(r, e);
48345	}
48346
48347	#[simd_test(enable = "avx512f,avx512vl")]
48348	unsafe fn test_mm256_cvtepi32_epi8() {
48349	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48350	let r = _mm256_cvtepi32_epi8(a);
48351	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48352	assert_eq_m128i(r, e);
48353	}
48354
48355	#[simd_test(enable = "avx512f,avx512vl")]
48356	unsafe fn test_mm256_mask_cvtepi32_epi8() {
48357	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48358	let src = _mm_set1_epi8(`0`);
48359	let r = _mm256_mask_cvtepi32_epi8(src, `0`, a);
48360	assert_eq_m128i(r, src);
48361	let r = _mm256_mask_cvtepi32_epi8(src, `0b11111111`, a);
48362	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48363	assert_eq_m128i(r, e);
48364	}
48365
48366	#[simd_test(enable = "avx512f,avx512vl")]
48367	unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48368	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48369	let r = _mm256_maskz_cvtepi32_epi8(`0`, a);
48370	assert_eq_m128i(r, _mm_setzero_si128());
48371	let r = _mm256_maskz_cvtepi32_epi8(`0b11111111`, a);
48372	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48373	assert_eq_m128i(r, e);
48374	}
48375
48376	#[simd_test(enable = "avx512f,avx512vl")]
48377	unsafe fn test_mm_cvtepi32_epi8() {
48378	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48379	let r = _mm_cvtepi32_epi8(a);
48380	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48381	assert_eq_m128i(r, e);
48382	}
48383
48384	#[simd_test(enable = "avx512f,avx512vl")]
48385	unsafe fn test_mm_mask_cvtepi32_epi8() {
48386	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48387	let src = _mm_set1_epi8(`0`);
48388	let r = _mm_mask_cvtepi32_epi8(src, `0`, a);
48389	assert_eq_m128i(r, src);
48390	let r = _mm_mask_cvtepi32_epi8(src, `0b00001111`, a);
48391	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48392	assert_eq_m128i(r, e);
48393	}
48394
48395	#[simd_test(enable = "avx512f,avx512vl")]
48396	unsafe fn test_mm_maskz_cvtepi32_epi8() {
48397	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48398	let r = _mm_maskz_cvtepi32_epi8(`0`, a);
48399	assert_eq_m128i(r, _mm_setzero_si128());
48400	let r = _mm_maskz_cvtepi32_epi8(`0b00001111`, a);
48401	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48402	assert_eq_m128i(r, e);
48403	}
48404
48405	#[simd_test(enable = "avx512f")]
48406	unsafe fn test_mm512_cvtsepi32_epi16() {
48407	#[rustfmt::skip]
48408	let a = _mm512_set_epi32(
48409	`0`, `1`, `2`, `3`,
48410	`4`, `5`, `6`, `7`,
48411	`8`, `9`, `10`, `11`,
48412	`12`, `13`, i32::MIN, i32::MAX,
48413	);
48414	let r = _mm512_cvtsepi32_epi16(a);
48415	#[rustfmt::skip]
48416	let e = _mm256_set_epi16(
48417	`0`, `1`, `2`, `3`,
48418	`4`, `5`, `6`, `7`,
48419	`8`, `9`, `10`, `11`,
48420	`12`, `13`, i16::MIN, i16::MAX,
48421	);
48422	assert_eq_m256i(r, e);
48423	}
48424
48425	#[simd_test(enable = "avx512f")]
48426	unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48427	#[rustfmt::skip]
48428	let a = _mm512_set_epi32(
48429	`0`, `1`, `2`, `3`,
48430	`4`, `5`, `6`, `7`,
48431	`8`, `9`, `10`, `11`,
48432	`12`, `13`, i32::MIN, i32::MAX,
48433	);
48434	let src = _mm256_set1_epi16(`-1`);
48435	let r = _mm512_mask_cvtsepi32_epi16(src, `0`, a);
48436	assert_eq_m256i(r, src);
48437	let r = _mm512_mask_cvtsepi32_epi16(src, `0b00000000_11111111`, a);
48438	#[rustfmt::skip]
48439	let e = _mm256_set_epi16(
48440	`-1`, `-1`, `-1`, `-1`,
48441	`-1`, `-1`, `-1`, `-1`,
48442	`8`, `9`, `10`, `11`,
48443	`12`, `13`, i16::MIN, i16::MAX,
48444	);
48445	assert_eq_m256i(r, e);
48446	}
48447
48448	#[simd_test(enable = "avx512f")]
48449	unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48450	#[rustfmt::skip]
48451	let a = _mm512_set_epi32(
48452	`0`, `1`, `2`, `3`,
48453	`4`, `5`, `6`, `7`,
48454	`8`, `9`, `10`, `11`,
48455	`12`, `13`, i32::MIN, i32::MAX,
48456	);
48457	let r = _mm512_maskz_cvtsepi32_epi16(`0`, a);
48458	assert_eq_m256i(r, _mm256_setzero_si256());
48459	let r = _mm512_maskz_cvtsepi32_epi16(`0b00000000_11111111`, a);
48460	#[rustfmt::skip]
48461	let e = _mm256_set_epi16(
48462	`0`, `0`, `0`, `0`,
48463	`0`, `0`, `0`, `0`,
48464	`8`, `9`, `10`, `11`,
48465	`12`, `13`, i16::MIN, i16::MAX,
48466	);
48467	assert_eq_m256i(r, e);
48468	}
48469
48470	#[simd_test(enable = "avx512f,avx512vl")]
48471	unsafe fn test_mm256_cvtsepi32_epi16() {
48472	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48473	let r = _mm256_cvtsepi32_epi16(a);
48474	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48475	assert_eq_m128i(r, e);
48476	}
48477
48478	#[simd_test(enable = "avx512f,avx512vl")]
48479	unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48480	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48481	let src = _mm_set1_epi16(`-1`);
48482	let r = _mm256_mask_cvtsepi32_epi16(src, `0`, a);
48483	assert_eq_m128i(r, src);
48484	let r = _mm256_mask_cvtsepi32_epi16(src, `0b11111111`, a);
48485	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48486	assert_eq_m128i(r, e);
48487	}
48488
48489	#[simd_test(enable = "avx512f,avx512vl")]
48490	unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48491	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48492	let r = _mm256_maskz_cvtsepi32_epi16(`0`, a);
48493	assert_eq_m128i(r, _mm_setzero_si128());
48494	let r = _mm256_maskz_cvtsepi32_epi16(`0b11111111`, a);
48495	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
48496	assert_eq_m128i(r, e);
48497	}
48498
48499	#[simd_test(enable = "avx512f,avx512vl")]
48500	unsafe fn test_mm_cvtsepi32_epi16() {
48501	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48502	let r = _mm_cvtsepi32_epi16(a);
48503	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48504	assert_eq_m128i(r, e);
48505	}
48506
48507	#[simd_test(enable = "avx512f,avx512vl")]
48508	unsafe fn test_mm_mask_cvtsepi32_epi16() {
48509	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48510	let src = _mm_set1_epi16(`0`);
48511	let r = _mm_mask_cvtsepi32_epi16(src, `0`, a);
48512	assert_eq_m128i(r, src);
48513	let r = _mm_mask_cvtsepi32_epi16(src, `0b11111111`, a);
48514	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48515	assert_eq_m128i(r, e);
48516	}
48517
48518	#[simd_test(enable = "avx512f,avx512vl")]
48519	unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48520	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
48521	let r = _mm_maskz_cvtsepi32_epi16(`0`, a);
48522	assert_eq_m128i(r, _mm_setzero_si128());
48523	let r = _mm_maskz_cvtsepi32_epi16(`0b11111111`, a);
48524	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
48525	assert_eq_m128i(r, e);
48526	}
48527
48528	#[simd_test(enable = "avx512f")]
48529	unsafe fn test_mm512_cvtsepi32_epi8() {
48530	#[rustfmt::skip]
48531	let a = _mm512_set_epi32(
48532	`0`, `1`, `2`, `3`,
48533	`4`, `5`, `6`, `7`,
48534	`8`, `9`, `10`, `11`,
48535	`12`, `13`, i32::MIN, i32::MAX,
48536	);
48537	let r = _mm512_cvtsepi32_epi8(a);
48538	#[rustfmt::skip]
48539	let e = _mm_set_epi8(
48540	`0`, `1`, `2`, `3`,
48541	`4`, `5`, `6`, `7`,
48542	`8`, `9`, `10`, `11`,
48543	`12`, `13`, i8::MIN, i8::MAX,
48544	);
48545	assert_eq_m128i(r, e);
48546	}
48547
48548	#[simd_test(enable = "avx512f")]
48549	unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48550	#[rustfmt::skip]
48551	let a = _mm512_set_epi32(
48552	`0`, `1`, `2`, `3`,
48553	`4`, `5`, `6`, `7`,
48554	`8`, `9`, `10`, `11`,
48555	`12`, `13`, i32::MIN, i32::MAX,
48556	);
48557	let src = _mm_set1_epi8(`-1`);
48558	let r = _mm512_mask_cvtsepi32_epi8(src, `0`, a);
48559	assert_eq_m128i(r, src);
48560	let r = _mm512_mask_cvtsepi32_epi8(src, `0b00000000_11111111`, a);
48561	#[rustfmt::skip]
48562	let e = _mm_set_epi8(
48563	`-1`, `-1`, `-1`, `-1`,
48564	`-1`, `-1`, `-1`, `-1`,
48565	`8`, `9`, `10`, `11`,
48566	`12`, `13`, i8::MIN, i8::MAX,
48567	);
48568	assert_eq_m128i(r, e);
48569	}
48570
48571	#[simd_test(enable = "avx512f")]
48572	unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48573	#[rustfmt::skip]
48574	let a = _mm512_set_epi32(
48575	`0`, `1`, `2`, `3`,
48576	`4`, `5`, `6`, `7`,
48577	`8`, `9`, `10`, `11`,
48578	`12`, `13`, i32::MIN, i32::MAX,
48579	);
48580	let r = _mm512_maskz_cvtsepi32_epi8(`0`, a);
48581	assert_eq_m128i(r, _mm_setzero_si128());
48582	let r = _mm512_maskz_cvtsepi32_epi8(`0b00000000_11111111`, a);
48583	#[rustfmt::skip]
48584	let e = _mm_set_epi8(
48585	`0`, `0`, `0`, `0`,
48586	`0`, `0`, `0`, `0`,
48587	`8`, `9`, `10`, `11`,
48588	`12`, `13`, i8::MIN, i8::MAX,
48589	);
48590	assert_eq_m128i(r, e);
48591	}
48592
48593	#[simd_test(enable = "avx512f,avx512vl")]
48594	unsafe fn test_mm256_cvtsepi32_epi8() {
48595	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
48596	let r = _mm256_cvtsepi32_epi8(a);
48597	#[rustfmt::skip]
48598	let e = _mm_set_epi8(
48599	`0`, `0`, `0`, `0`,
48600	`0`, `0`, `0`, `0`,
48601	`9`, `10`, `11`, `12`,
48602	`13`, `14`, `15`, `16`,
48603	);
48604	assert_eq_m128i(r, e);
48605	}
48606
48607	#[simd_test(enable = "avx512f,avx512vl")]
48608	unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48609	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
48610	let src = _mm_set1_epi8(`0`);
48611	let r = _mm256_mask_cvtsepi32_epi8(src, `0`, a);
48612	assert_eq_m128i(r, src);
48613	let r = _mm256_mask_cvtsepi32_epi8(src, `0b11111111`, a);
48614	#[rustfmt::skip]
48615	let e = _mm_set_epi8(
48616	`0`, `0`, `0`, `0`,
48617	`0`, `0`, `0`, `0`,
48618	`9`, `10`, `11`, `12`,
48619	`13`, `14`, `15`, `16`,
48620	);
48621	assert_eq_m128i(r, e);
48622	}
48623
48624	#[simd_test(enable = "avx512f,avx512vl")]
48625	unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48626	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
48627	let r = _mm256_maskz_cvtsepi32_epi8(`0`, a);
48628	assert_eq_m128i(r, _mm_setzero_si128());
48629	let r = _mm256_maskz_cvtsepi32_epi8(`0b11111111`, a);
48630	#[rustfmt::skip]
48631	let e = _mm_set_epi8(
48632	`0`, `0`, `0`, `0`,
48633	`0`, `0`, `0`, `0`,
48634	`9`, `10`, `11`, `12`,
48635	`13`, `14`, `15`, `16`,
48636	);
48637	assert_eq_m128i(r, e);
48638	}
48639
48640	#[simd_test(enable = "avx512f,avx512vl")]
48641	unsafe fn test_mm_cvtsepi32_epi8() {
48642	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
48643	let r = _mm_cvtsepi32_epi8(a);
48644	#[rustfmt::skip]
48645	let e = _mm_set_epi8(
48646	`0`, `0`, `0`, `0`,
48647	`0`, `0`, `0`, `0`,
48648	`0`, `0`, `0`, `0`,
48649	`13`, `14`, `15`, `16`,
48650	);
48651	assert_eq_m128i(r, e);
48652	}
48653
48654	#[simd_test(enable = "avx512f,avx512vl")]
48655	unsafe fn test_mm_mask_cvtsepi32_epi8() {
48656	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
48657	let src = _mm_set1_epi8(`0`);
48658	let r = _mm_mask_cvtsepi32_epi8(src, `0`, a);
48659	assert_eq_m128i(r, src);
48660	let r = _mm_mask_cvtsepi32_epi8(src, `0b00001111`, a);
48661	#[rustfmt::skip]
48662	let e = _mm_set_epi8(
48663	`0`, `0`, `0`, `0`,
48664	`0`, `0`, `0`, `0`,
48665	`0`, `0`, `0`, `0`,
48666	`13`, `14`, `15`, `16`,
48667	);
48668	assert_eq_m128i(r, e);
48669	}
48670
48671	#[simd_test(enable = "avx512f,avx512vl")]
48672	unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48673	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
48674	let r = _mm_maskz_cvtsepi32_epi8(`0`, a);
48675	assert_eq_m128i(r, _mm_setzero_si128());
48676	let r = _mm_maskz_cvtsepi32_epi8(`0b00001111`, a);
48677	#[rustfmt::skip]
48678	let e = _mm_set_epi8(
48679	`0`, `0`, `0`, `0`,
48680	`0`, `0`, `0`, `0`,
48681	`0`, `0`, `0`, `0`,
48682	`13`, `14`, `15`, `16`,
48683	);
48684	assert_eq_m128i(r, e);
48685	}
48686
48687	#[simd_test(enable = "avx512f")]
48688	unsafe fn test_mm512_cvtusepi32_epi16() {
48689	#[rustfmt::skip]
48690	let a = _mm512_set_epi32(
48691	`0`, `1`, `2`, `3`,
48692	`4`, `5`, `6`, `7`,
48693	`8`, `9`, `10`, `11`,
48694	`12`, `13`, i32::MIN, i32::MIN,
48695	);
48696	let r = _mm512_cvtusepi32_epi16(a);
48697	let e = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
48698	assert_eq_m256i(r, e);
48699	}
48700
48701	#[simd_test(enable = "avx512f")]
48702	unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48703	#[rustfmt::skip]
48704	let a = _mm512_set_epi32(
48705	`0`, `1`, `2`, `3`,
48706	`4`, `5`, `6`, `7`,
48707	`8`, `9`, `10`, `11`,
48708	`12`, `13`, i32::MIN, i32::MIN,
48709	);
48710	let src = _mm256_set1_epi16(`-1`);
48711	let r = _mm512_mask_cvtusepi32_epi16(src, `0`, a);
48712	assert_eq_m256i(r, src);
48713	let r = _mm512_mask_cvtusepi32_epi16(src, `0b00000000_11111111`, a);
48714	let e = _mm256_set_epi16(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
48715	assert_eq_m256i(r, e);
48716	}
48717
48718	#[simd_test(enable = "avx512f")]
48719	unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48720	#[rustfmt::skip]
48721	let a = _mm512_set_epi32(
48722	`0`, `1`, `2`, `3`,
48723	`4`, `5`, `6`, `7`,
48724	`8`, `9`, `10`, `11`,
48725	`12`, `13`, i32::MIN, i32::MIN,
48726	);
48727	let r = _mm512_maskz_cvtusepi32_epi16(`0`, a);
48728	assert_eq_m256i(r, _mm256_setzero_si256());
48729	let r = _mm512_maskz_cvtusepi32_epi16(`0b00000000_11111111`, a);
48730	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
48731	assert_eq_m256i(r, e);
48732	}
48733
48734	#[simd_test(enable = "avx512f,avx512vl")]
48735	unsafe fn test_mm256_cvtusepi32_epi16() {
48736	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48737	let r = _mm256_cvtusepi32_epi16(a);
48738	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48739	assert_eq_m128i(r, e);
48740	}
48741
48742	#[simd_test(enable = "avx512f,avx512vl")]
48743	unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48744	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48745	let src = _mm_set1_epi16(`0`);
48746	let r = _mm256_mask_cvtusepi32_epi16(src, `0`, a);
48747	assert_eq_m128i(r, src);
48748	let r = _mm256_mask_cvtusepi32_epi16(src, `0b11111111`, a);
48749	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48750	assert_eq_m128i(r, e);
48751	}
48752
48753	#[simd_test(enable = "avx512f,avx512vl")]
48754	unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48755	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48756	let r = _mm256_maskz_cvtusepi32_epi16(`0`, a);
48757	assert_eq_m128i(r, _mm_setzero_si128());
48758	let r = _mm256_maskz_cvtusepi32_epi16(`0b11111111`, a);
48759	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
48760	assert_eq_m128i(r, e);
48761	}
48762
48763	#[simd_test(enable = "avx512f,avx512vl")]
48764	unsafe fn test_mm_cvtusepi32_epi16() {
48765	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
48766	let r = _mm_cvtusepi32_epi16(a);
48767	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
48768	assert_eq_m128i(r, e);
48769	}
48770
48771	#[simd_test(enable = "avx512f,avx512vl")]
48772	unsafe fn test_mm_mask_cvtusepi32_epi16() {
48773	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
48774	let src = _mm_set1_epi16(`0`);
48775	let r = _mm_mask_cvtusepi32_epi16(src, `0`, a);
48776	assert_eq_m128i(r, src);
48777	let r = _mm_mask_cvtusepi32_epi16(src, `0b00001111`, a);
48778	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
48779	assert_eq_m128i(r, e);
48780	}
48781
48782	#[simd_test(enable = "avx512f,avx512vl")]
48783	unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48784	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
48785	let r = _mm_maskz_cvtusepi32_epi16(`0`, a);
48786	assert_eq_m128i(r, _mm_setzero_si128());
48787	let r = _mm_maskz_cvtusepi32_epi16(`0b00001111`, a);
48788	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
48789	assert_eq_m128i(r, e);
48790	}
48791
48792	#[simd_test(enable = "avx512f")]
48793	unsafe fn test_mm512_cvtusepi32_epi8() {
48794	#[rustfmt::skip]
48795	let a = _mm512_set_epi32(
48796	`0`, `1`, `2`, `3`,
48797	`4`, `5`, `6`, `7`,
48798	`8`, `9`, `10`, `11`,
48799	`12`, `13`, i32::MIN, i32::MIN,
48800	);
48801	let r = _mm512_cvtusepi32_epi8(a);
48802	let e = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
48803	assert_eq_m128i(r, e);
48804	}
48805
48806	#[simd_test(enable = "avx512f")]
48807	unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48808	#[rustfmt::skip]
48809	let a = _mm512_set_epi32(
48810	`0`, `1`, `2`, `3`,
48811	`4`, `5`, `6`, `7`,
48812	`8`, `9`, `10`, `11`,
48813	`12`, `13`, i32::MIN, i32::MIN,
48814	);
48815	let src = _mm_set1_epi8(`-1`);
48816	let r = _mm512_mask_cvtusepi32_epi8(src, `0`, a);
48817	assert_eq_m128i(r, src);
48818	let r = _mm512_mask_cvtusepi32_epi8(src, `0b00000000_11111111`, a);
48819	let e = _mm_set_epi8(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
48820	assert_eq_m128i(r, e);
48821	}
48822
48823	#[simd_test(enable = "avx512f")]
48824	unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48825	#[rustfmt::skip]
48826	let a = _mm512_set_epi32(
48827	`0`, `1`, `2`, `3`,
48828	`4`, `5`, `6`, `7`,
48829	`8`, `9`, `10`, `11`,
48830	`12`, `13`, i32::MIN, i32::MIN,
48831	);
48832	let r = _mm512_maskz_cvtusepi32_epi8(`0`, a);
48833	assert_eq_m128i(r, _mm_setzero_si128());
48834	let r = _mm512_maskz_cvtusepi32_epi8(`0b00000000_11111111`, a);
48835	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
48836	assert_eq_m128i(r, e);
48837	}
48838
48839	#[simd_test(enable = "avx512f,avx512vl")]
48840	unsafe fn test_mm256_cvtusepi32_epi8() {
48841	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
48842	let r = _mm256_cvtusepi32_epi8(a);
48843	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
48844	assert_eq_m128i(r, e);
48845	}
48846
48847	#[simd_test(enable = "avx512f,avx512vl")]
48848	unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48849	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
48850	let src = _mm_set1_epi8(`0`);
48851	let r = _mm256_mask_cvtusepi32_epi8(src, `0`, a);
48852	assert_eq_m128i(r, src);
48853	let r = _mm256_mask_cvtusepi32_epi8(src, `0b11111111`, a);
48854	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
48855	assert_eq_m128i(r, e);
48856	}
48857
48858	#[simd_test(enable = "avx512f,avx512vl")]
48859	unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48860	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
48861	let r = _mm256_maskz_cvtusepi32_epi8(`0`, a);
48862	assert_eq_m128i(r, _mm_setzero_si128());
48863	let r = _mm256_maskz_cvtusepi32_epi8(`0b11111111`, a);
48864	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
48865	assert_eq_m128i(r, e);
48866	}
48867
48868	#[simd_test(enable = "avx512f,avx512vl")]
48869	unsafe fn test_mm_cvtusepi32_epi8() {
48870	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
48871	let r = _mm_cvtusepi32_epi8(a);
48872	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
48873	assert_eq_m128i(r, e);
48874	}
48875
48876	#[simd_test(enable = "avx512f,avx512vl")]
48877	unsafe fn test_mm_mask_cvtusepi32_epi8() {
48878	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
48879	let src = _mm_set1_epi8(`0`);
48880	let r = _mm_mask_cvtusepi32_epi8(src, `0`, a);
48881	assert_eq_m128i(r, src);
48882	let r = _mm_mask_cvtusepi32_epi8(src, `0b00001111`, a);
48883	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
48884	assert_eq_m128i(r, e);
48885	}
48886
48887	#[simd_test(enable = "avx512f,avx512vl")]
48888	unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48889	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
48890	let r = _mm_maskz_cvtusepi32_epi8(`0`, a);
48891	assert_eq_m128i(r, _mm_setzero_si128());
48892	let r = _mm_maskz_cvtusepi32_epi8(`0b00001111`, a);
48893	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
48894	assert_eq_m128i(r, e);
48895	}
48896
48897	#[simd_test(enable = "avx512f")]
48898	unsafe fn test_mm512_cvt_roundps_epi32() {
48899	let a = _mm512_setr_ps(
48900	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
48901	);
48902	let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
48903	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
48904	assert_eq_m512i(r, e);
48905	let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC }>(a);
48906	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48907	assert_eq_m512i(r, e);
48908	}
48909
48910	#[simd_test(enable = "avx512f")]
48911	unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48912	let a = _mm512_setr_ps(
48913	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
48914	);
48915	let src = _mm512_set1_epi32(`0`);
48916	let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48917	src, `0`, a,
48918	);
48919	assert_eq_m512i(r, src);
48920	let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48921	src,
48922	`0b00000000_11111111`,
48923	a,
48924	);
48925	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
48926	assert_eq_m512i(r, e);
48927	}
48928
48929	#[simd_test(enable = "avx512f")]
48930	unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
48931	let a = _mm512_setr_ps(
48932	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
48933	);
48934	let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48935	`0`, a,
48936	);
48937	assert_eq_m512i(r, _mm512_setzero_si512());
48938	let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48939	`0b00000000_11111111`,
48940	a,
48941	);
48942	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
48943	assert_eq_m512i(r, e);
48944	}
48945
48946	#[simd_test(enable = "avx512f")]
48947	unsafe fn test_mm512_cvt_roundps_epu32() {
48948	let a = _mm512_setr_ps(
48949	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
48950	);
48951	let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
48952	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
48953	assert_eq_m512i(r, e);
48954	let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC }>(a);
48955	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
48956	assert_eq_m512i(r, e);
48957	}
48958
48959	#[simd_test(enable = "avx512f")]
48960	unsafe fn test_mm512_mask_cvt_roundps_epu32() {
48961	let a = _mm512_setr_ps(
48962	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
48963	);
48964	let src = _mm512_set1_epi32(`0`);
48965	let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48966	src, `0`, a,
48967	);
48968	assert_eq_m512i(r, src);
48969	let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48970	src,
48971	`0b00000000_11111111`,
48972	a,
48973	);
48974	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
48975	assert_eq_m512i(r, e);
48976	}
48977
48978	#[simd_test(enable = "avx512f")]
48979	unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
48980	let a = _mm512_setr_ps(
48981	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
48982	);
48983	let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48984	`0`, a,
48985	);
48986	assert_eq_m512i(r, _mm512_setzero_si512());
48987	let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48988	`0b00000000_11111111`,
48989	a,
48990	);
48991	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
48992	assert_eq_m512i(r, e);
48993	}
48994
48995	#[simd_test(enable = "avx512f")]
48996	unsafe fn test_mm512_cvt_roundepi32_ps() {
48997	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
48998	let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
48999	let e = _mm512_setr_ps(
49000	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`,
49001	);
49002	assert_eq_m512(r, e);
49003	}
49004
49005	#[simd_test(enable = "avx512f")]
49006	unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49007	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49008	let src = _mm512_set1_ps(`0.`);
49009	let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49010	src, `0`, a,
49011	);
49012	assert_eq_m512(r, src);
49013	let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49014	src,
49015	`0b00000000_11111111`,
49016	a,
49017	);
49018	let e = _mm512_setr_ps(
49019	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49020	);
49021	assert_eq_m512(r, e);
49022	}
49023
49024	#[simd_test(enable = "avx512f")]
49025	unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49026	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49027	let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49028	`0`, a,
49029	);
49030	assert_eq_m512(r, _mm512_setzero_ps());
49031	let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49032	`0b00000000_11111111`,
49033	a,
49034	);
49035	let e = _mm512_setr_ps(
49036	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49037	);
49038	assert_eq_m512(r, e);
49039	}
49040
49041	#[simd_test(enable = "avx512f")]
49042	unsafe fn test_mm512_cvt_roundepu32_ps() {
49043	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49044	let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
49045	#[rustfmt::skip]
49046	let e = _mm512_setr_ps(
49047	`0.`, `4294967300.`, `2.`, `4294967300.`,
49048	`4.`, `4294967300.`, `6.`, `4294967300.`,
49049	`8.`, `10.`, `10.`, `12.`,
49050	`12.`, `14.`, `14.`, `16.`,
49051	);
49052	assert_eq_m512(r, e);
49053	}
49054
49055	#[simd_test(enable = "avx512f")]
49056	unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49057	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49058	let src = _mm512_set1_ps(`0.`);
49059	let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49060	src, `0`, a,
49061	);
49062	assert_eq_m512(r, src);
49063	let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49064	src,
49065	`0b00000000_11111111`,
49066	a,
49067	);
49068	#[rustfmt::skip]
49069	let e = _mm512_setr_ps(
49070	`0.`, `4294967300.`, `2.`, `4294967300.`,
49071	`4.`, `4294967300.`, `6.`, `4294967300.`,
49072	`0.`, `0.`, `0.`, `0.`,
49073	`0.`, `0.`, `0.`, `0.`,
49074	);
49075	assert_eq_m512(r, e);
49076	}
49077
49078	#[simd_test(enable = "avx512f")]
49079	unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49080	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49081	let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49082	`0`, a,
49083	);
49084	assert_eq_m512(r, _mm512_setzero_ps());
49085	let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49086	`0b00000000_11111111`,
49087	a,
49088	);
49089	#[rustfmt::skip]
49090	let e = _mm512_setr_ps(
49091	`0.`, `4294967300.`, `2.`, `4294967300.`,
49092	`4.`, `4294967300.`, `6.`, `4294967300.`,
49093	`0.`, `0.`, `0.`, `0.`,
49094	`0.`, `0.`, `0.`, `0.`,
49095	);
49096	assert_eq_m512(r, e);
49097	}
49098
49099	#[simd_test(enable = "avx512f")]
49100	unsafe fn test_mm512_cvt_roundps_ph() {
49101	let a = _mm512_set1_ps(`1.`);
49102	let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49103	let e = _mm256_setr_epi64x(
49104	`4323521613979991040`,
49105	`4323521613979991040`,
49106	`4323521613979991040`,
49107	`4323521613979991040`,
49108	);
49109	assert_eq_m256i(r, e);
49110	}
49111
49112	#[simd_test(enable = "avx512f")]
49113	unsafe fn test_mm512_mask_cvt_roundps_ph() {
49114	let a = _mm512_set1_ps(`1.`);
49115	let src = _mm256_set1_epi16(`0`);
49116	let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
49117	assert_eq_m256i(r, src);
49118	let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
49119	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
49120	assert_eq_m256i(r, e);
49121	}
49122
49123	#[simd_test(enable = "avx512f")]
49124	unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49125	let a = _mm512_set1_ps(`1.`);
49126	let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
49127	assert_eq_m256i(r, _mm256_setzero_si256());
49128	let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
49129	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
49130	assert_eq_m256i(r, e);
49131	}
49132
49133	#[simd_test(enable = "avx512f,avx512vl")]
49134	unsafe fn test_mm256_mask_cvt_roundps_ph() {
49135	let a = _mm256_set1_ps(`1.`);
49136	let src = _mm_set1_epi16(`0`);
49137	let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
49138	assert_eq_m128i(r, src);
49139	let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b11111111`, a);
49140	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49141	assert_eq_m128i(r, e);
49142	}
49143
49144	#[simd_test(enable = "avx512f,avx512vl")]
49145	unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49146	let a = _mm256_set1_ps(`1.`);
49147	let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
49148	assert_eq_m128i(r, _mm_setzero_si128());
49149	let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b11111111`, a);
49150	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49151	assert_eq_m128i(r, e);
49152	}
49153
49154	#[simd_test(enable = "avx512f,avx512vl")]
49155	unsafe fn test_mm_mask_cvt_roundps_ph() {
49156	let a = _mm_set1_ps(`1.`);
49157	let src = _mm_set1_epi16(`0`);
49158	let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
49159	assert_eq_m128i(r, src);
49160	let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b00001111`, a);
49161	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
49162	assert_eq_m128i(r, e);
49163	}
49164
49165	#[simd_test(enable = "avx512f,avx512vl")]
49166	unsafe fn test_mm_maskz_cvt_roundps_ph() {
49167	let a = _mm_set1_ps(`1.`);
49168	let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
49169	assert_eq_m128i(r, _mm_setzero_si128());
49170	let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b00001111`, a);
49171	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
49172	assert_eq_m128i(r, e);
49173	}
49174
49175	#[simd_test(enable = "avx512f")]
49176	unsafe fn test_mm512_cvtps_ph() {
49177	let a = _mm512_set1_ps(`1.`);
49178	let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49179	let e = _mm256_setr_epi64x(
49180	`4323521613979991040`,
49181	`4323521613979991040`,
49182	`4323521613979991040`,
49183	`4323521613979991040`,
49184	);
49185	assert_eq_m256i(r, e);
49186	}
49187
49188	#[simd_test(enable = "avx512f")]
49189	unsafe fn test_mm512_mask_cvtps_ph() {
49190	let a = _mm512_set1_ps(`1.`);
49191	let src = _mm256_set1_epi16(`0`);
49192	let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
49193	assert_eq_m256i(r, src);
49194	let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
49195	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
49196	assert_eq_m256i(r, e);
49197	}
49198
49199	#[simd_test(enable = "avx512f")]
49200	unsafe fn test_mm512_maskz_cvtps_ph() {
49201	let a = _mm512_set1_ps(`1.`);
49202	let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
49203	assert_eq_m256i(r, _mm256_setzero_si256());
49204	let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
49205	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
49206	assert_eq_m256i(r, e);
49207	}
49208
49209	#[simd_test(enable = "avx512f,avx512vl")]
49210	unsafe fn test_mm256_mask_cvtps_ph() {
49211	let a = _mm256_set1_ps(`1.`);
49212	let src = _mm_set1_epi16(`0`);
49213	let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
49214	assert_eq_m128i(r, src);
49215	let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b11111111`, a);
49216	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49217	assert_eq_m128i(r, e);
49218	}
49219
49220	#[simd_test(enable = "avx512f,avx512vl")]
49221	unsafe fn test_mm256_maskz_cvtps_ph() {
49222	let a = _mm256_set1_ps(`1.`);
49223	let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
49224	assert_eq_m128i(r, _mm_setzero_si128());
49225	let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b11111111`, a);
49226	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49227	assert_eq_m128i(r, e);
49228	}
49229
49230	#[simd_test(enable = "avx512f,avx512vl")]
49231	unsafe fn test_mm_mask_cvtps_ph() {
49232	let a = _mm_set1_ps(`1.`);
49233	let src = _mm_set1_epi16(`0`);
49234	let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
49235	assert_eq_m128i(r, src);
49236	let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b00001111`, a);
49237	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
49238	assert_eq_m128i(r, e);
49239	}
49240
49241	#[simd_test(enable = "avx512f,avx512vl")]
49242	unsafe fn test_mm_maskz_cvtps_ph() {
49243	let a = _mm_set1_ps(`1.`);
49244	let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
49245	assert_eq_m128i(r, _mm_setzero_si128());
49246	let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b00001111`, a);
49247	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
49248	assert_eq_m128i(r, e);
49249	}
49250
49251	#[simd_test(enable = "avx512f")]
49252	unsafe fn test_mm512_cvt_roundph_ps() {
49253	let a = _mm256_setr_epi64x(
49254	`4323521613979991040`,
49255	`4323521613979991040`,
49256	`4323521613979991040`,
49257	`4323521613979991040`,
49258	);
49259	let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49260	let e = _mm512_set1_ps(`1.`);
49261	assert_eq_m512(r, e);
49262	}
49263
49264	#[simd_test(enable = "avx512f")]
49265	unsafe fn test_mm512_mask_cvt_roundph_ps() {
49266	let a = _mm256_setr_epi64x(
49267	`4323521613979991040`,
49268	`4323521613979991040`,
49269	`4323521613979991040`,
49270	`4323521613979991040`,
49271	);
49272	let src = _mm512_set1_ps(`0.`);
49273	let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0`, a);
49274	assert_eq_m512(r, src);
49275	let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
49276	let e = _mm512_setr_ps(
49277	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49278	);
49279	assert_eq_m512(r, e);
49280	}
49281
49282	#[simd_test(enable = "avx512f")]
49283	unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49284	let a = _mm256_setr_epi64x(
49285	`4323521613979991040`,
49286	`4323521613979991040`,
49287	`4323521613979991040`,
49288	`4323521613979991040`,
49289	);
49290	let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(`0`, a);
49291	assert_eq_m512(r, _mm512_setzero_ps());
49292	let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
49293	let e = _mm512_setr_ps(
49294	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49295	);
49296	assert_eq_m512(r, e);
49297	}
49298
49299	#[simd_test(enable = "avx512f")]
49300	unsafe fn test_mm512_cvtph_ps() {
49301	let a = _mm256_setr_epi64x(
49302	`4323521613979991040`,
49303	`4323521613979991040`,
49304	`4323521613979991040`,
49305	`4323521613979991040`,
49306	);
49307	let r = _mm512_cvtph_ps(a);
49308	let e = _mm512_set1_ps(`1.`);
49309	assert_eq_m512(r, e);
49310	}
49311
49312	#[simd_test(enable = "avx512f")]
49313	unsafe fn test_mm512_mask_cvtph_ps() {
49314	let a = _mm256_setr_epi64x(
49315	`4323521613979991040`,
49316	`4323521613979991040`,
49317	`4323521613979991040`,
49318	`4323521613979991040`,
49319	);
49320	let src = _mm512_set1_ps(`0.`);
49321	let r = _mm512_mask_cvtph_ps(src, `0`, a);
49322	assert_eq_m512(r, src);
49323	let r = _mm512_mask_cvtph_ps(src, `0b00000000_11111111`, a);
49324	let e = _mm512_setr_ps(
49325	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49326	);
49327	assert_eq_m512(r, e);
49328	}
49329
49330	#[simd_test(enable = "avx512f")]
49331	unsafe fn test_mm512_maskz_cvtph_ps() {
49332	let a = _mm256_setr_epi64x(
49333	`4323521613979991040`,
49334	`4323521613979991040`,
49335	`4323521613979991040`,
49336	`4323521613979991040`,
49337	);
49338	let r = _mm512_maskz_cvtph_ps(`0`, a);
49339	assert_eq_m512(r, _mm512_setzero_ps());
49340	let r = _mm512_maskz_cvtph_ps(`0b00000000_11111111`, a);
49341	let e = _mm512_setr_ps(
49342	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49343	);
49344	assert_eq_m512(r, e);
49345	}
49346
49347	#[simd_test(enable = "avx512f,avx512vl")]
49348	unsafe fn test_mm256_mask_cvtph_ps() {
49349	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49350	let src = _mm256_set1_ps(`0.`);
49351	let r = _mm256_mask_cvtph_ps(src, `0`, a);
49352	assert_eq_m256(r, src);
49353	let r = _mm256_mask_cvtph_ps(src, `0b11111111`, a);
49354	let e = _mm256_setr_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
49355	assert_eq_m256(r, e);
49356	}
49357
49358	#[simd_test(enable = "avx512f,avx512vl")]
49359	unsafe fn test_mm256_maskz_cvtph_ps() {
49360	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49361	let r = _mm256_maskz_cvtph_ps(`0`, a);
49362	assert_eq_m256(r, _mm256_setzero_ps());
49363	let r = _mm256_maskz_cvtph_ps(`0b11111111`, a);
49364	let e = _mm256_setr_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
49365	assert_eq_m256(r, e);
49366	}
49367
49368	#[simd_test(enable = "avx512f,avx512vl")]
49369	unsafe fn test_mm_mask_cvtph_ps() {
49370	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49371	let src = _mm_set1_ps(`0.`);
49372	let r = _mm_mask_cvtph_ps(src, `0`, a);
49373	assert_eq_m128(r, src);
49374	let r = _mm_mask_cvtph_ps(src, `0b00001111`, a);
49375	let e = _mm_setr_ps(`1.`, `1.`, `1.`, `1.`);
49376	assert_eq_m128(r, e);
49377	}
49378
49379	#[simd_test(enable = "avx512f,avx512vl")]
49380	unsafe fn test_mm_maskz_cvtph_ps() {
49381	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
49382	let r = _mm_maskz_cvtph_ps(`0`, a);
49383	assert_eq_m128(r, _mm_setzero_ps());
49384	let r = _mm_maskz_cvtph_ps(`0b00001111`, a);
49385	let e = _mm_setr_ps(`1.`, `1.`, `1.`, `1.`);
49386	assert_eq_m128(r, e);
49387	}
49388
49389	#[simd_test(enable = "avx512f")]
49390	unsafe fn test_mm512_cvtt_roundps_epi32() {
49391	let a = _mm512_setr_ps(
49392	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49393	);
49394	let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49395	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49396	assert_eq_m512i(r, e);
49397	}
49398
49399	#[simd_test(enable = "avx512f")]
49400	unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49401	let a = _mm512_setr_ps(
49402	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49403	);
49404	let src = _mm512_set1_epi32(`0`);
49405	let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, `0`, a);
49406	assert_eq_m512i(r, src);
49407	let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
49408	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49409	assert_eq_m512i(r, e);
49410	}
49411
49412	#[simd_test(enable = "avx512f")]
49413	unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49414	let a = _mm512_setr_ps(
49415	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49416	);
49417	let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(`0`, a);
49418	assert_eq_m512i(r, _mm512_setzero_si512());
49419	let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
49420	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49421	assert_eq_m512i(r, e);
49422	}
49423
49424	#[simd_test(enable = "avx512f")]
49425	unsafe fn test_mm512_cvtt_roundps_epu32() {
49426	let a = _mm512_setr_ps(
49427	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49428	);
49429	let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49430	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49431	assert_eq_m512i(r, e);
49432	}
49433
49434	#[simd_test(enable = "avx512f")]
49435	unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49436	let a = _mm512_setr_ps(
49437	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49438	);
49439	let src = _mm512_set1_epi32(`0`);
49440	let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, `0`, a);
49441	assert_eq_m512i(r, src);
49442	let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
49443	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49444	assert_eq_m512i(r, e);
49445	}
49446
49447	#[simd_test(enable = "avx512f")]
49448	unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49449	let a = _mm512_setr_ps(
49450	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49451	);
49452	let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(`0`, a);
49453	assert_eq_m512i(r, _mm512_setzero_si512());
49454	let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
49455	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49456	assert_eq_m512i(r, e);
49457	}
49458
49459	#[simd_test(enable = "avx512f")]
49460	unsafe fn test_mm512_cvttps_epi32() {
49461	let a = _mm512_setr_ps(
49462	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49463	);
49464	let r = _mm512_cvttps_epi32(a);
49465	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49466	assert_eq_m512i(r, e);
49467	}
49468
49469	#[simd_test(enable = "avx512f")]
49470	unsafe fn test_mm512_mask_cvttps_epi32() {
49471	let a = _mm512_setr_ps(
49472	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49473	);
49474	let src = _mm512_set1_epi32(`0`);
49475	let r = _mm512_mask_cvttps_epi32(src, `0`, a);
49476	assert_eq_m512i(r, src);
49477	let r = _mm512_mask_cvttps_epi32(src, `0b00000000_11111111`, a);
49478	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49479	assert_eq_m512i(r, e);
49480	}
49481
49482	#[simd_test(enable = "avx512f")]
49483	unsafe fn test_mm512_maskz_cvttps_epi32() {
49484	let a = _mm512_setr_ps(
49485	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49486	);
49487	let r = _mm512_maskz_cvttps_epi32(`0`, a);
49488	assert_eq_m512i(r, _mm512_setzero_si512());
49489	let r = _mm512_maskz_cvttps_epi32(`0b00000000_11111111`, a);
49490	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49491	assert_eq_m512i(r, e);
49492	}
49493
49494	#[simd_test(enable = "avx512f,avx512vl")]
49495	unsafe fn test_mm256_mask_cvttps_epi32() {
49496	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49497	let src = _mm256_set1_epi32(`0`);
49498	let r = _mm256_mask_cvttps_epi32(src, `0`, a);
49499	assert_eq_m256i(r, src);
49500	let r = _mm256_mask_cvttps_epi32(src, `0b11111111`, a);
49501	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49502	assert_eq_m256i(r, e);
49503	}
49504
49505	#[simd_test(enable = "avx512f,avx512vl")]
49506	unsafe fn test_mm256_maskz_cvttps_epi32() {
49507	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49508	let r = _mm256_maskz_cvttps_epi32(`0`, a);
49509	assert_eq_m256i(r, _mm256_setzero_si256());
49510	let r = _mm256_maskz_cvttps_epi32(`0b11111111`, a);
49511	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49512	assert_eq_m256i(r, e);
49513	}
49514
49515	#[simd_test(enable = "avx512f,avx512vl")]
49516	unsafe fn test_mm_mask_cvttps_epi32() {
49517	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49518	let src = _mm_set1_epi32(`0`);
49519	let r = _mm_mask_cvttps_epi32(src, `0`, a);
49520	assert_eq_m128i(r, src);
49521	let r = _mm_mask_cvttps_epi32(src, `0b00001111`, a);
49522	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49523	assert_eq_m128i(r, e);
49524	}
49525
49526	#[simd_test(enable = "avx512f,avx512vl")]
49527	unsafe fn test_mm_maskz_cvttps_epi32() {
49528	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49529	let r = _mm_maskz_cvttps_epi32(`0`, a);
49530	assert_eq_m128i(r, _mm_setzero_si128());
49531	let r = _mm_maskz_cvttps_epi32(`0b00001111`, a);
49532	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49533	assert_eq_m128i(r, e);
49534	}
49535
49536	#[simd_test(enable = "avx512f")]
49537	unsafe fn test_mm512_cvttps_epu32() {
49538	let a = _mm512_setr_ps(
49539	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49540	);
49541	let r = _mm512_cvttps_epu32(a);
49542	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49543	assert_eq_m512i(r, e);
49544	}
49545
49546	#[simd_test(enable = "avx512f")]
49547	unsafe fn test_mm512_mask_cvttps_epu32() {
49548	let a = _mm512_setr_ps(
49549	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49550	);
49551	let src = _mm512_set1_epi32(`0`);
49552	let r = _mm512_mask_cvttps_epu32(src, `0`, a);
49553	assert_eq_m512i(r, src);
49554	let r = _mm512_mask_cvttps_epu32(src, `0b00000000_11111111`, a);
49555	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49556	assert_eq_m512i(r, e);
49557	}
49558
49559	#[simd_test(enable = "avx512f")]
49560	unsafe fn test_mm512_maskz_cvttps_epu32() {
49561	let a = _mm512_setr_ps(
49562	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49563	);
49564	let r = _mm512_maskz_cvttps_epu32(`0`, a);
49565	assert_eq_m512i(r, _mm512_setzero_si512());
49566	let r = _mm512_maskz_cvttps_epu32(`0b00000000_11111111`, a);
49567	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49568	assert_eq_m512i(r, e);
49569	}
49570
49571	#[simd_test(enable = "avx512f,avx512vl")]
49572	unsafe fn test_mm256_cvttps_epu32() {
49573	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49574	let r = _mm256_cvttps_epu32(a);
49575	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49576	assert_eq_m256i(r, e);
49577	}
49578
49579	#[simd_test(enable = "avx512f,avx512vl")]
49580	unsafe fn test_mm256_mask_cvttps_epu32() {
49581	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49582	let src = _mm256_set1_epi32(`0`);
49583	let r = _mm256_mask_cvttps_epu32(src, `0`, a);
49584	assert_eq_m256i(r, src);
49585	let r = _mm256_mask_cvttps_epu32(src, `0b11111111`, a);
49586	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49587	assert_eq_m256i(r, e);
49588	}
49589
49590	#[simd_test(enable = "avx512f,avx512vl")]
49591	unsafe fn test_mm256_maskz_cvttps_epu32() {
49592	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49593	let r = _mm256_maskz_cvttps_epu32(`0`, a);
49594	assert_eq_m256i(r, _mm256_setzero_si256());
49595	let r = _mm256_maskz_cvttps_epu32(`0b11111111`, a);
49596	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49597	assert_eq_m256i(r, e);
49598	}
49599
49600	#[simd_test(enable = "avx512f,avx512vl")]
49601	unsafe fn test_mm_cvttps_epu32() {
49602	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49603	let r = _mm_cvttps_epu32(a);
49604	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49605	assert_eq_m128i(r, e);
49606	}
49607
49608	#[simd_test(enable = "avx512f,avx512vl")]
49609	unsafe fn test_mm_mask_cvttps_epu32() {
49610	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49611	let src = _mm_set1_epi32(`0`);
49612	let r = _mm_mask_cvttps_epu32(src, `0`, a);
49613	assert_eq_m128i(r, src);
49614	let r = _mm_mask_cvttps_epu32(src, `0b00001111`, a);
49615	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49616	assert_eq_m128i(r, e);
49617	}
49618
49619	#[simd_test(enable = "avx512f,avx512vl")]
49620	unsafe fn test_mm_maskz_cvttps_epu32() {
49621	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49622	let r = _mm_maskz_cvttps_epu32(`0`, a);
49623	assert_eq_m128i(r, _mm_setzero_si128());
49624	let r = _mm_maskz_cvttps_epu32(`0b00001111`, a);
49625	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49626	assert_eq_m128i(r, e);
49627	}
49628
49629	#[simd_test(enable = "avx512f")]
49630	unsafe fn test_mm512_i32gather_ps() {
49631	let arr: [f32; `256`] = core::array::from_fn(\|i\| i as f32);
49632	// A multiplier of 4 is word-addressing
49633	#[rustfmt::skip]
49634	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49635	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
49636	let r = _mm512_i32gather_ps::<`4`>(index, arr.as_ptr() as *const u8);
49637	#[rustfmt::skip]
49638	assert_eq_m512(r, _mm512_setr_ps(`0.`, `16.`, `32.`, `48.`, `64.`, `80.`, `96.`, `112.`,
49639	`120.`, `128.`, `136.`, `144.`, `152.`, `160.`, `168.`, `176.`));
49640	}
49641
49642	#[simd_test(enable = "avx512f")]
49643	unsafe fn test_mm512_mask_i32gather_ps() {
49644	let arr: [f32; `256`] = core::array::from_fn(\|i\| i as f32);
49645	let src = _mm512_set1_ps(`2.`);
49646	let mask = `0b10101010_10101010`;
49647	#[rustfmt::skip]
49648	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49649	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
49650	// A multiplier of 4 is word-addressing
49651	let r = _mm512_mask_i32gather_ps::<`4`>(src, mask, index, arr.as_ptr() as *const u8);
49652	#[rustfmt::skip]
49653	assert_eq_m512(r, _mm512_setr_ps(`2.`, `16.`, `2.`, `48.`, `2.`, `80.`, `2.`, `112.`,
49654	`2.`, `128.`, `2.`, `144.`, `2.`, `160.`, `2.`, `176.`));
49655	}
49656
49657	#[simd_test(enable = "avx512f")]
49658	unsafe fn test_mm512_i32gather_epi32() {
49659	let arr: [i32; `256`] = core::array::from_fn(\|i\| i as i32);
49660	// A multiplier of 4 is word-addressing
49661	#[rustfmt::skip]
49662	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49663	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
49664	let r = _mm512_i32gather_epi32::<`4`>(index, arr.as_ptr() as *const u8);
49665	#[rustfmt::skip]
49666	assert_eq_m512i(r, _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49667	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`));
49668	}
49669
49670	#[simd_test(enable = "avx512f")]
49671	unsafe fn test_mm512_mask_i32gather_epi32() {
49672	let arr: [i32; `256`] = core::array::from_fn(\|i\| i as i32);
49673	let src = _mm512_set1_epi32(`2`);
49674	let mask = `0b10101010_10101010`;
49675	let index = _mm512_setr_epi32(
49676	`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`, `128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`,
49677	);
49678	// A multiplier of 4 is word-addressing
49679	let r = _mm512_mask_i32gather_epi32::<`4`>(src, mask, index, arr.as_ptr() as *const u8);
49680	assert_eq_m512i(
49681	r,
49682	_mm512_setr_epi32(`2`, `16`, `2`, `48`, `2`, `80`, `2`, `112`, `2`, `144`, `2`, `176`, `2`, `208`, `2`, `240`),
49683	);
49684	}
49685
49686	#[simd_test(enable = "avx512f")]
49687	unsafe fn test_mm512_i32scatter_ps() {
49688	let mut arr = [`0f32`; `256`];
49689	#[rustfmt::skip]
49690	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49691	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
49692	let src = _mm512_setr_ps(
49693	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
49694	);
49695	// A multiplier of 4 is word-addressing
49696	_mm512_i32scatter_ps::<`4`>(arr.as_mut_ptr() as *mut u8, index, src);
49697	let mut expected = [`0f32`; `256`];
49698	for i in `0`..`16` {
49699	expected[i * `16`] = (i + `1`) as f32;
49700	}
49701	assert_eq!(&arr[..], &expected[..],);
49702	}
49703
49704	#[simd_test(enable = "avx512f")]
49705	unsafe fn test_mm512_mask_i32scatter_ps() {
49706	let mut arr = [`0f32`; `256`];
49707	let mask = `0b10101010_10101010`;
49708	#[rustfmt::skip]
49709	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49710	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
49711	let src = _mm512_setr_ps(
49712	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
49713	);
49714	// A multiplier of 4 is word-addressing
49715	_mm512_mask_i32scatter_ps::<`4`>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49716	let mut expected = [`0f32`; `256`];
49717	for i in `0`..`8` {
49718	expected[i * `32` + `16`] = `2.` * (i + `1`) as f32;
49719	}
49720	assert_eq!(&arr[..], &expected[..],);
49721	}
49722
49723	#[simd_test(enable = "avx512f")]
49724	unsafe fn test_mm512_i32scatter_epi32() {
49725	let mut arr = [`0i32`; `256`];
49726	#[rustfmt::skip]
49727
49728	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49729	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
49730	let src = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
49731	// A multiplier of 4 is word-addressing
49732	_mm512_i32scatter_epi32::<`4`>(arr.as_mut_ptr() as *mut u8, index, src);
49733	let mut expected = [`0i32`; `256`];
49734	for i in `0`..`16` {
49735	expected[i * `16`] = (i + `1`) as i32;
49736	}
49737	assert_eq!(&arr[..], &expected[..],);
49738	}
49739
49740	#[simd_test(enable = "avx512f")]
49741	unsafe fn test_mm512_mask_i32scatter_epi32() {
49742	let mut arr = [`0i32`; `256`];
49743	let mask = `0b10101010_10101010`;
49744	#[rustfmt::skip]
49745	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
49746	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
49747	let src = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
49748	// A multiplier of 4 is word-addressing
49749	_mm512_mask_i32scatter_epi32::<`4`>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49750	let mut expected = [`0i32`; `256`];
49751	for i in `0`..`8` {
49752	expected[i * `32` + `16`] = `2` * (i + `1`) as i32;
49753	}
49754	assert_eq!(&arr[..], &expected[..],);
49755	}
49756
49757	#[simd_test(enable = "avx512f")]
49758	unsafe fn test_mm512_cmplt_ps_mask() {
49759	#[rustfmt::skip]
49760	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49761	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49762	let b = _mm512_set1_ps(`-1.`);
49763	let m = _mm512_cmplt_ps_mask(a, b);
49764	assert_eq!(m, `0b00000101_00000101`);
49765	}
49766
49767	#[simd_test(enable = "avx512f")]
49768	unsafe fn test_mm512_mask_cmplt_ps_mask() {
49769	#[rustfmt::skip]
49770	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49771	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49772	let b = _mm512_set1_ps(`-1.`);
49773	let mask = `0b01100110_01100110`;
49774	let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49775	assert_eq!(r, `0b00000100_00000100`);
49776	}
49777
49778	#[simd_test(enable = "avx512f")]
49779	unsafe fn test_mm512_cmpnlt_ps_mask() {
49780	#[rustfmt::skip]
49781	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49782	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49783	let b = _mm512_set1_ps(`-1.`);
49784	assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49785	}
49786
49787	#[simd_test(enable = "avx512f")]
49788	unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49789	#[rustfmt::skip]
49790	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49791	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49792	let b = _mm512_set1_ps(`-1.`);
49793	let mask = `0b01111010_01111010`;
49794	assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), `0b01111010_01111010`);
49795	}
49796
49797	#[simd_test(enable = "avx512f")]
49798	unsafe fn test_mm512_cmpnle_ps_mask() {
49799	#[rustfmt::skip]
49800	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49801	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49802	let b = _mm512_set1_ps(`-1.`);
49803	let m = _mm512_cmpnle_ps_mask(b, a);
49804	assert_eq!(m, `0b00001101_00001101`);
49805	}
49806
49807	#[simd_test(enable = "avx512f")]
49808	unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49809	#[rustfmt::skip]
49810	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49811	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49812	let b = _mm512_set1_ps(`-1.`);
49813	let mask = `0b01100110_01100110`;
49814	let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49815	assert_eq!(r, `0b00000100_00000100`);
49816	}
49817
49818	#[simd_test(enable = "avx512f")]
49819	unsafe fn test_mm512_cmple_ps_mask() {
49820	#[rustfmt::skip]
49821	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49822	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49823	let b = _mm512_set1_ps(`-1.`);
49824	assert_eq!(_mm512_cmple_ps_mask(a, b), `0b00100101_00100101`);
49825	}
49826
49827	#[simd_test(enable = "avx512f")]
49828	unsafe fn test_mm512_mask_cmple_ps_mask() {
49829	#[rustfmt::skip]
49830	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
49831	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
49832	let b = _mm512_set1_ps(`-1.`);
49833	let mask = `0b01111010_01111010`;
49834	assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), `0b00100000_00100000`);
49835	}
49836
49837	#[simd_test(enable = "avx512f")]
49838	unsafe fn test_mm512_cmpeq_ps_mask() {
49839	#[rustfmt::skip]
49840	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49841	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49842	#[rustfmt::skip]
49843	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49844	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49845	let m = _mm512_cmpeq_ps_mask(b, a);
49846	assert_eq!(m, `0b11001101_11001101`);
49847	}
49848
49849	#[simd_test(enable = "avx512f")]
49850	unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49851	#[rustfmt::skip]
49852	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49853	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49854	#[rustfmt::skip]
49855	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49856	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49857	let mask = `0b01111010_01111010`;
49858	let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49859	assert_eq!(r, `0b01001000_01001000`);
49860	}
49861
49862	#[simd_test(enable = "avx512f")]
49863	unsafe fn test_mm512_cmpneq_ps_mask() {
49864	#[rustfmt::skip]
49865	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49866	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49867	#[rustfmt::skip]
49868	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49869	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49870	let m = _mm512_cmpneq_ps_mask(b, a);
49871	assert_eq!(m, `0b00110010_00110010`);
49872	}
49873
49874	#[simd_test(enable = "avx512f")]
49875	unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49876	#[rustfmt::skip]
49877	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49878	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49879	#[rustfmt::skip]
49880	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
49881	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
49882	let mask = `0b01111010_01111010`;
49883	let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49884	assert_eq!(r, `0b00110010_00110010`)
49885	}
49886
49887	#[simd_test(enable = "avx512f")]
49888	unsafe fn test_mm512_cmp_ps_mask() {
49889	#[rustfmt::skip]
49890	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
49891	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
49892	let b = _mm512_set1_ps(`-1.`);
49893	let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49894	assert_eq!(m, `0b00000101_00000101`);
49895	}
49896
49897	#[simd_test(enable = "avx512f")]
49898	unsafe fn test_mm512_mask_cmp_ps_mask() {
49899	#[rustfmt::skip]
49900	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
49901	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
49902	let b = _mm512_set1_ps(`-1.`);
49903	let mask = `0b01100110_01100110`;
49904	let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49905	assert_eq!(r, `0b00000100_00000100`);
49906	}
49907
49908	#[simd_test(enable = "avx512f,avx512vl")]
49909	unsafe fn test_mm256_cmp_ps_mask() {
49910	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
49911	let b = _mm256_set1_ps(`-1.`);
49912	let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49913	assert_eq!(m, `0b00000101`);
49914	}
49915
49916	#[simd_test(enable = "avx512f,avx512vl")]
49917	unsafe fn test_mm256_mask_cmp_ps_mask() {
49918	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
49919	let b = _mm256_set1_ps(`-1.`);
49920	let mask = `0b01100110`;
49921	let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49922	assert_eq!(r, `0b00000100`);
49923	}
49924
49925	#[simd_test(enable = "avx512f,avx512vl")]
49926	unsafe fn test_mm_cmp_ps_mask() {
49927	let a = _mm_set_ps(`0.`, `1.`, `-1.`, `13.`);
49928	let b = _mm_set1_ps(`1.`);
49929	let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49930	assert_eq!(m, `0b00001010`);
49931	}
49932
49933	#[simd_test(enable = "avx512f,avx512vl")]
49934	unsafe fn test_mm_mask_cmp_ps_mask() {
49935	let a = _mm_set_ps(`0.`, `1.`, `-1.`, `13.`);
49936	let b = _mm_set1_ps(`1.`);
49937	let mask = `0b11111111`;
49938	let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49939	assert_eq!(r, `0b00001010`);
49940	}
49941
49942	#[simd_test(enable = "avx512f")]
49943	unsafe fn test_mm512_cmp_round_ps_mask() {
49944	#[rustfmt::skip]
49945	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
49946	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
49947	let b = _mm512_set1_ps(`-1.`);
49948	let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
49949	assert_eq!(m, `0b00000101_00000101`);
49950	}
49951
49952	#[simd_test(enable = "avx512f")]
49953	unsafe fn test_mm512_mask_cmp_round_ps_mask() {
49954	#[rustfmt::skip]
49955	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
49956	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
49957	let b = _mm512_set1_ps(`-1.`);
49958	let mask = `0b01100110_01100110`;
49959	let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
49960	assert_eq!(r, `0b00000100_00000100`);
49961	}
49962
49963	#[simd_test(enable = "avx512f")]
49964	unsafe fn test_mm512_cmpord_ps_mask() {
49965	#[rustfmt::skip]
49966	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
49967	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
49968	#[rustfmt::skip]
49969	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
49970	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
49971	let m = _mm512_cmpord_ps_mask(a, b);
49972	assert_eq!(m, `0b00000101_00000101`);
49973	}
49974
49975	#[simd_test(enable = "avx512f")]
49976	unsafe fn test_mm512_mask_cmpord_ps_mask() {
49977	#[rustfmt::skip]
49978	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
49979	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
49980	#[rustfmt::skip]
49981	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
49982	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
49983	let mask = `0b11000011_11000011`;
49984	let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
49985	assert_eq!(m, `0b00000001_00000001`);
49986	}
49987
49988	#[simd_test(enable = "avx512f")]
49989	unsafe fn test_mm512_cmpunord_ps_mask() {
49990	#[rustfmt::skip]
49991	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
49992	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
49993	#[rustfmt::skip]
49994	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
49995	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
49996	let m = _mm512_cmpunord_ps_mask(a, b);
49997
49998	assert_eq!(m, `0b11111010_11111010`);
49999	}
50000
50001	#[simd_test(enable = "avx512f")]
50002	unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50003	#[rustfmt::skip]
50004	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
50005	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
50006	#[rustfmt::skip]
50007	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
50008	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
50009	let mask = `0b00001111_00001111`;
50010	let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50011	assert_eq!(m, `0b000001010_00001010`);
50012	}
50013
50014	#[simd_test(enable = "avx512f")]
50015	unsafe fn test_mm_cmp_ss_mask() {
50016	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
50017	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
50018	let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50019	assert_eq!(m, `1`);
50020	}
50021
50022	#[simd_test(enable = "avx512f")]
50023	unsafe fn test_mm_mask_cmp_ss_mask() {
50024	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
50025	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
50026	let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(`0b10`, a, b);
50027	assert_eq!(m, `0`);
50028	let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(`0b1`, a, b);
50029	assert_eq!(m, `1`);
50030	}
50031
50032	#[simd_test(enable = "avx512f")]
50033	unsafe fn test_mm_cmp_round_ss_mask() {
50034	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
50035	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
50036	let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50037	assert_eq!(m, `1`);
50038	}
50039
50040	#[simd_test(enable = "avx512f")]
50041	unsafe fn test_mm_mask_cmp_round_ss_mask() {
50042	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
50043	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
50044	let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b10`, a, b);
50045	assert_eq!(m, `0`);
50046	let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b1`, a, b);
50047	assert_eq!(m, `1`);
50048	}
50049
50050	#[simd_test(enable = "avx512f")]
50051	unsafe fn test_mm_cmp_sd_mask() {
50052	let a = _mm_setr_pd(`2.`, `1.`);
50053	let b = _mm_setr_pd(`1.`, `2.`);
50054	let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50055	assert_eq!(m, `1`);
50056	}
50057
50058	#[simd_test(enable = "avx512f")]
50059	unsafe fn test_mm_mask_cmp_sd_mask() {
50060	let a = _mm_setr_pd(`2.`, `1.`);
50061	let b = _mm_setr_pd(`1.`, `2.`);
50062	let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(`0b10`, a, b);
50063	assert_eq!(m, `0`);
50064	let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(`0b1`, a, b);
50065	assert_eq!(m, `1`);
50066	}
50067
50068	#[simd_test(enable = "avx512f")]
50069	unsafe fn test_mm_cmp_round_sd_mask() {
50070	let a = _mm_setr_pd(`2.`, `1.`);
50071	let b = _mm_setr_pd(`1.`, `2.`);
50072	let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50073	assert_eq!(m, `1`);
50074	}
50075
50076	#[simd_test(enable = "avx512f")]
50077	unsafe fn test_mm_mask_cmp_round_sd_mask() {
50078	let a = _mm_setr_pd(`2.`, `1.`);
50079	let b = _mm_setr_pd(`1.`, `2.`);
50080	let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b10`, a, b);
50081	assert_eq!(m, `0`);
50082	let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b1`, a, b);
50083	assert_eq!(m, `1`);
50084	}
50085
50086	#[simd_test(enable = "avx512f")]
50087	unsafe fn test_mm512_cmplt_epu32_mask() {
50088	#[rustfmt::skip]
50089	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50090	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50091	let b = _mm512_set1_epi32(`-1`);
50092	let m = _mm512_cmplt_epu32_mask(a, b);
50093	assert_eq!(m, `0b11001111_11001111`);
50094	}
50095
50096	#[simd_test(enable = "avx512f")]
50097	unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50098	#[rustfmt::skip]
50099	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50100	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50101	let b = _mm512_set1_epi32(`-1`);
50102	let mask = `0b01111010_01111010`;
50103	let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50104	assert_eq!(r, `0b01001010_01001010`);
50105	}
50106
50107	#[simd_test(enable = "avx512f,avx512vl")]
50108	unsafe fn test_mm256_cmplt_epu32_mask() {
50109	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `101`, `100`, `99`);
50110	let b = _mm256_set1_epi32(`1`);
50111	let r = _mm256_cmplt_epu32_mask(a, b);
50112	assert_eq!(r, `0b10000000`);
50113	}
50114
50115	#[simd_test(enable = "avx512f,avx512vl")]
50116	unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50117	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `101`, `100`, `99`);
50118	let b = _mm256_set1_epi32(`1`);
50119	let mask = `0b11111111`;
50120	let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50121	assert_eq!(r, `0b10000000`);
50122	}
50123
50124	#[simd_test(enable = "avx512f,avx512vl")]
50125	unsafe fn test_mm_cmplt_epu32_mask() {
50126	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50127	let b = _mm_set1_epi32(`1`);
50128	let r = _mm_cmplt_epu32_mask(a, b);
50129	assert_eq!(r, `0b00001000`);
50130	}
50131
50132	#[simd_test(enable = "avx512f,avx512vl")]
50133	unsafe fn test_mm_mask_cmplt_epu32_mask() {
50134	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50135	let b = _mm_set1_epi32(`1`);
50136	let mask = `0b11111111`;
50137	let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50138	assert_eq!(r, `0b00001000`);
50139	}
50140
50141	#[simd_test(enable = "avx512f")]
50142	unsafe fn test_mm512_cmpgt_epu32_mask() {
50143	#[rustfmt::skip]
50144	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50145	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50146	let b = _mm512_set1_epi32(`-1`);
50147	let m = _mm512_cmpgt_epu32_mask(b, a);
50148	assert_eq!(m, `0b11001111_11001111`);
50149	}
50150
50151	#[simd_test(enable = "avx512f")]
50152	unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50153	#[rustfmt::skip]
50154	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50155	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50156	let b = _mm512_set1_epi32(`-1`);
50157	let mask = `0b01111010_01111010`;
50158	let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50159	assert_eq!(r, `0b01001010_01001010`);
50160	}
50161
50162	#[simd_test(enable = "avx512f,avx512vl")]
50163	unsafe fn test_mm256_cmpgt_epu32_mask() {
50164	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `99`, `100`, `101`);
50165	let b = _mm256_set1_epi32(`1`);
50166	let r = _mm256_cmpgt_epu32_mask(a, b);
50167	assert_eq!(r, `0b00111111`);
50168	}
50169
50170	#[simd_test(enable = "avx512f,avx512vl")]
50171	unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50172	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `99`, `100`, `101`);
50173	let b = _mm256_set1_epi32(`1`);
50174	let mask = `0b11111111`;
50175	let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50176	assert_eq!(r, `0b00111111`);
50177	}
50178
50179	#[simd_test(enable = "avx512f,avx512vl")]
50180	unsafe fn test_mm_cmpgt_epu32_mask() {
50181	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50182	let b = _mm_set1_epi32(`1`);
50183	let r = _mm_cmpgt_epu32_mask(a, b);
50184	assert_eq!(r, `0b00000011`);
50185	}
50186
50187	#[simd_test(enable = "avx512f,avx512vl")]
50188	unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50189	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50190	let b = _mm_set1_epi32(`1`);
50191	let mask = `0b11111111`;
50192	let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50193	assert_eq!(r, `0b00000011`);
50194	}
50195
50196	#[simd_test(enable = "avx512f")]
50197	unsafe fn test_mm512_cmple_epu32_mask() {
50198	#[rustfmt::skip]
50199	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50200	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50201	let b = _mm512_set1_epi32(`-1`);
50202	assert_eq!(
50203	_mm512_cmple_epu32_mask(a, b),
50204	!_mm512_cmpgt_epu32_mask(a, b)
50205	)
50206	}
50207
50208	#[simd_test(enable = "avx512f")]
50209	unsafe fn test_mm512_mask_cmple_epu32_mask() {
50210	#[rustfmt::skip]
50211	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50212	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50213	let b = _mm512_set1_epi32(`-1`);
50214	let mask = `0b01111010_01111010`;
50215	assert_eq!(
50216	_mm512_mask_cmple_epu32_mask(mask, a, b),
50217	`0b01111010_01111010`
50218	);
50219	}
50220
50221	#[simd_test(enable = "avx512f,avx512vl")]
50222	unsafe fn test_mm256_cmple_epu32_mask() {
50223	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `200`, `100`, `101`);
50224	let b = _mm256_set1_epi32(`1`);
50225	let r = _mm256_cmple_epu32_mask(a, b);
50226	assert_eq!(r, `0b11000000`)
50227	}
50228
50229	#[simd_test(enable = "avx512f,avx512vl")]
50230	unsafe fn test_mm256_mask_cmple_epu32_mask() {
50231	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `200`, `100`, `101`);
50232	let b = _mm256_set1_epi32(`1`);
50233	let mask = `0b11111111`;
50234	let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50235	assert_eq!(r, `0b11000000`)
50236	}
50237
50238	#[simd_test(enable = "avx512f,avx512vl")]
50239	unsafe fn test_mm_cmple_epu32_mask() {
50240	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50241	let b = _mm_set1_epi32(`1`);
50242	let r = _mm_cmple_epu32_mask(a, b);
50243	assert_eq!(r, `0b00001100`)
50244	}
50245
50246	#[simd_test(enable = "avx512f,avx512vl")]
50247	unsafe fn test_mm_mask_cmple_epu32_mask() {
50248	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50249	let b = _mm_set1_epi32(`1`);
50250	let mask = `0b11111111`;
50251	let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50252	assert_eq!(r, `0b00001100`)
50253	}
50254
50255	#[simd_test(enable = "avx512f")]
50256	unsafe fn test_mm512_cmpge_epu32_mask() {
50257	#[rustfmt::skip]
50258	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50259	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50260	let b = _mm512_set1_epi32(`-1`);
50261	assert_eq!(
50262	_mm512_cmpge_epu32_mask(a, b),
50263	!_mm512_cmplt_epu32_mask(a, b)
50264	)
50265	}
50266
50267	#[simd_test(enable = "avx512f")]
50268	unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50269	#[rustfmt::skip]
50270	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50271	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50272	let b = _mm512_set1_epi32(`-1`);
50273	let mask = `0b01111010_01111010`;
50274	assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), `0b01100000_0110000`);
50275	}
50276
50277	#[simd_test(enable = "avx512f,avx512vl")]
50278	unsafe fn test_mm256_cmpge_epu32_mask() {
50279	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `300`, `100`, `200`);
50280	let b = _mm256_set1_epi32(`1`);
50281	let r = _mm256_cmpge_epu32_mask(a, b);
50282	assert_eq!(r, `0b01111111`)
50283	}
50284
50285	#[simd_test(enable = "avx512f,avx512vl")]
50286	unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50287	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `300`, `100`, `200`);
50288	let b = _mm256_set1_epi32(`1`);
50289	let mask = `0b11111111`;
50290	let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50291	assert_eq!(r, `0b01111111`)
50292	}
50293
50294	#[simd_test(enable = "avx512f,avx512vl")]
50295	unsafe fn test_mm_cmpge_epu32_mask() {
50296	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50297	let b = _mm_set1_epi32(`1`);
50298	let r = _mm_cmpge_epu32_mask(a, b);
50299	assert_eq!(r, `0b00000111`)
50300	}
50301
50302	#[simd_test(enable = "avx512f,avx512vl")]
50303	unsafe fn test_mm_mask_cmpge_epu32_mask() {
50304	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
50305	let b = _mm_set1_epi32(`1`);
50306	let mask = `0b11111111`;
50307	let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50308	assert_eq!(r, `0b00000111`)
50309	}
50310
50311	#[simd_test(enable = "avx512f")]
50312	unsafe fn test_mm512_cmpeq_epu32_mask() {
50313	#[rustfmt::skip]
50314	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50315	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50316	#[rustfmt::skip]
50317	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50318	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50319	let m = _mm512_cmpeq_epu32_mask(b, a);
50320	assert_eq!(m, `0b11001111_11001111`);
50321	}
50322
50323	#[simd_test(enable = "avx512f")]
50324	unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50325	#[rustfmt::skip]
50326	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50327	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50328	#[rustfmt::skip]
50329	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50330	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50331	let mask = `0b01111010_01111010`;
50332	let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50333	assert_eq!(r, `0b01001010_01001010`);
50334	}
50335
50336	#[simd_test(enable = "avx512f,avx512vl")]
50337	unsafe fn test_mm256_cmpeq_epu32_mask() {
50338	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50339	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50340	let m = _mm256_cmpeq_epu32_mask(b, a);
50341	assert_eq!(m, `0b11001111`);
50342	}
50343
50344	#[simd_test(enable = "avx512f,avx512vl")]
50345	unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50346	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50347	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50348	let mask = `0b01111010`;
50349	let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50350	assert_eq!(r, `0b01001010`);
50351	}
50352
50353	#[simd_test(enable = "avx512f,avx512vl")]
50354	unsafe fn test_mm_cmpeq_epu32_mask() {
50355	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
50356	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50357	let m = _mm_cmpeq_epu32_mask(b, a);
50358	assert_eq!(m, `0b00001100`);
50359	}
50360
50361	#[simd_test(enable = "avx512f,avx512vl")]
50362	unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50363	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
50364	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50365	let mask = `0b11111111`;
50366	let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50367	assert_eq!(r, `0b00001100`);
50368	}
50369
50370	#[simd_test(enable = "avx512f")]
50371	unsafe fn test_mm512_cmpneq_epu32_mask() {
50372	#[rustfmt::skip]
50373	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50374	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50375	#[rustfmt::skip]
50376	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50377	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50378	let m = _mm512_cmpneq_epu32_mask(b, a);
50379	assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50380	}
50381
50382	#[simd_test(enable = "avx512f")]
50383	unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50384	#[rustfmt::skip]
50385	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`,
50386	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
50387	#[rustfmt::skip]
50388	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50389	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50390	let mask = `0b01111010_01111010`;
50391	let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50392	assert_eq!(r, `0b00110010_00110010`);
50393	}
50394
50395	#[simd_test(enable = "avx512f,avx512vl")]
50396	unsafe fn test_mm256_cmpneq_epu32_mask() {
50397	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
50398	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `-100`, `100`);
50399	let r = _mm256_cmpneq_epu32_mask(b, a);
50400	assert_eq!(r, `0b00110000`);
50401	}
50402
50403	#[simd_test(enable = "avx512f,avx512vl")]
50404	unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50405	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
50406	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `-100`, `100`);
50407	let mask = `0b11111111`;
50408	let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50409	assert_eq!(r, `0b00110000`);
50410	}
50411
50412	#[simd_test(enable = "avx512f,avx512vl")]
50413	unsafe fn test_mm_cmpneq_epu32_mask() {
50414	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
50415	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50416	let r = _mm_cmpneq_epu32_mask(b, a);
50417	assert_eq!(r, `0b00000011`);
50418	}
50419
50420	#[simd_test(enable = "avx512f,avx512vl")]
50421	unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50422	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
50423	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50424	let mask = `0b11111111`;
50425	let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50426	assert_eq!(r, `0b00000011`);
50427	}
50428
50429	#[simd_test(enable = "avx512f")]
50430	unsafe fn test_mm512_cmp_epu32_mask() {
50431	#[rustfmt::skip]
50432	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50433	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50434	let b = _mm512_set1_epi32(`-1`);
50435	let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50436	assert_eq!(m, `0b11001111_11001111`);
50437	}
50438
50439	#[simd_test(enable = "avx512f")]
50440	unsafe fn test_mm512_mask_cmp_epu32_mask() {
50441	#[rustfmt::skip]
50442	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50443	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50444	let b = _mm512_set1_epi32(`-1`);
50445	let mask = `0b01111010_01111010`;
50446	let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50447	assert_eq!(r, `0b01001010_01001010`);
50448	}
50449
50450	#[simd_test(enable = "avx512f,avx512vl")]
50451	unsafe fn test_mm256_cmp_epu32_mask() {
50452	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50453	let b = _mm256_set1_epi32(`-1`);
50454	let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50455	assert_eq!(m, `0b11001111`);
50456	}
50457
50458	#[simd_test(enable = "avx512f,avx512vl")]
50459	unsafe fn test_mm256_mask_cmp_epu32_mask() {
50460	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50461	let b = _mm256_set1_epi32(`-1`);
50462	let mask = `0b11111111`;
50463	let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50464	assert_eq!(r, `0b11001111`);
50465	}
50466
50467	#[simd_test(enable = "avx512f,avx512vl")]
50468	unsafe fn test_mm_cmp_epu32_mask() {
50469	let a = _mm_set_epi32(`0`, `1`, `-1`, i32::MAX);
50470	let b = _mm_set1_epi32(`1`);
50471	let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50472	assert_eq!(m, `0b00001000`);
50473	}
50474
50475	#[simd_test(enable = "avx512f,avx512vl")]
50476	unsafe fn test_mm_mask_cmp_epu32_mask() {
50477	let a = _mm_set_epi32(`0`, `1`, `-1`, i32::MAX);
50478	let b = _mm_set1_epi32(`1`);
50479	let mask = `0b11111111`;
50480	let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50481	assert_eq!(r, `0b00001000`);
50482	}
50483
50484	#[simd_test(enable = "avx512f")]
50485	unsafe fn test_mm512_cmplt_epi32_mask() {
50486	#[rustfmt::skip]
50487	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50488	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50489	let b = _mm512_set1_epi32(`-1`);
50490	let m = _mm512_cmplt_epi32_mask(a, b);
50491	assert_eq!(m, `0b00000101_00000101`);
50492	}
50493
50494	#[simd_test(enable = "avx512f")]
50495	unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50496	#[rustfmt::skip]
50497	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50498	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50499	let b = _mm512_set1_epi32(`-1`);
50500	let mask = `0b01100110_01100110`;
50501	let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50502	assert_eq!(r, `0b00000100_00000100`);
50503	}
50504
50505	#[simd_test(enable = "avx512f,avx512vl")]
50506	unsafe fn test_mm256_cmplt_epi32_mask() {
50507	let a = _mm256_set_epi32(`0`, `1`, `-1`, `101`, i32::MAX, i32::MIN, `100`, `-100`);
50508	let b = _mm256_set1_epi32(`-1`);
50509	let r = _mm256_cmplt_epi32_mask(a, b);
50510	assert_eq!(r, `0b00000101`);
50511	}
50512
50513	#[simd_test(enable = "avx512f,avx512vl")]
50514	unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50515	let a = _mm256_set_epi32(`0`, `1`, `-1`, `101`, i32::MAX, i32::MIN, `100`, `-100`);
50516	let b = _mm256_set1_epi32(`-1`);
50517	let mask = `0b11111111`;
50518	let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50519	assert_eq!(r, `0b00000101`);
50520	}
50521
50522	#[simd_test(enable = "avx512f,avx512vl")]
50523	unsafe fn test_mm_cmplt_epi32_mask() {
50524	let a = _mm_set_epi32(i32::MAX, i32::MIN, `100`, `-100`);
50525	let b = _mm_set1_epi32(`-1`);
50526	let r = _mm_cmplt_epi32_mask(a, b);
50527	assert_eq!(r, `0b00000101`);
50528	}
50529
50530	#[simd_test(enable = "avx512f,avx512vl")]
50531	unsafe fn test_mm_mask_cmplt_epi32_mask() {
50532	let a = _mm_set_epi32(i32::MAX, i32::MIN, `100`, `-100`);
50533	let b = _mm_set1_epi32(`-1`);
50534	let mask = `0b11111111`;
50535	let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50536	assert_eq!(r, `0b00000101`);
50537	}
50538
50539	#[simd_test(enable = "avx512f")]
50540	unsafe fn test_mm512_cmpgt_epi32_mask() {
50541	#[rustfmt::skip]
50542	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50543	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50544	let b = _mm512_set1_epi32(`-1`);
50545	let m = _mm512_cmpgt_epi32_mask(b, a);
50546	assert_eq!(m, `0b00000101_00000101`);
50547	}
50548
50549	#[simd_test(enable = "avx512f")]
50550	unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50551	#[rustfmt::skip]
50552	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50553	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50554	let b = _mm512_set1_epi32(`-1`);
50555	let mask = `0b01100110_01100110`;
50556	let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50557	assert_eq!(r, `0b00000100_00000100`);
50558	}
50559
50560	#[simd_test(enable = "avx512f,avx512vl")]
50561	unsafe fn test_mm256_cmpgt_epi32_mask() {
50562	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50563	let b = _mm256_set1_epi32(`-1`);
50564	let r = _mm256_cmpgt_epi32_mask(a, b);
50565	assert_eq!(r, `0b11011010`);
50566	}
50567
50568	#[simd_test(enable = "avx512f,avx512vl")]
50569	unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50570	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50571	let b = _mm256_set1_epi32(`-1`);
50572	let mask = `0b11111111`;
50573	let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50574	assert_eq!(r, `0b11011010`);
50575	}
50576
50577	#[simd_test(enable = "avx512f,avx512vl")]
50578	unsafe fn test_mm_cmpgt_epi32_mask() {
50579	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50580	let b = _mm_set1_epi32(`-1`);
50581	let r = _mm_cmpgt_epi32_mask(a, b);
50582	assert_eq!(r, `0b00001101`);
50583	}
50584
50585	#[simd_test(enable = "avx512f,avx512vl")]
50586	unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50587	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50588	let b = _mm_set1_epi32(`-1`);
50589	let mask = `0b11111111`;
50590	let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50591	assert_eq!(r, `0b00001101`);
50592	}
50593
50594	#[simd_test(enable = "avx512f")]
50595	unsafe fn test_mm512_cmple_epi32_mask() {
50596	#[rustfmt::skip]
50597	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50598	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50599	let b = _mm512_set1_epi32(`-1`);
50600	assert_eq!(
50601	_mm512_cmple_epi32_mask(a, b),
50602	!_mm512_cmpgt_epi32_mask(a, b)
50603	)
50604	}
50605
50606	#[simd_test(enable = "avx512f")]
50607	unsafe fn test_mm512_mask_cmple_epi32_mask() {
50608	#[rustfmt::skip]
50609	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50610	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50611	let b = _mm512_set1_epi32(`-1`);
50612	let mask = `0b01111010_01111010`;
50613	assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), `0b01100000_0110000`);
50614	}
50615
50616	#[simd_test(enable = "avx512f,avx512vl")]
50617	unsafe fn test_mm256_cmple_epi32_mask() {
50618	let a = _mm256_set_epi32(`0`, `1`, `-1`, `200`, i32::MAX, i32::MIN, `100`, `-100`);
50619	let b = _mm256_set1_epi32(`-1`);
50620	let r = _mm256_cmple_epi32_mask(a, b);
50621	assert_eq!(r, `0b00100101`)
50622	}
50623
50624	#[simd_test(enable = "avx512f,avx512vl")]
50625	unsafe fn test_mm256_mask_cmple_epi32_mask() {
50626	let a = _mm256_set_epi32(`0`, `1`, `-1`, `200`, i32::MAX, i32::MIN, `100`, `-100`);
50627	let b = _mm256_set1_epi32(`-1`);
50628	let mask = `0b11111111`;
50629	let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50630	assert_eq!(r, `0b00100101`)
50631	}
50632
50633	#[simd_test(enable = "avx512f,avx512vl")]
50634	unsafe fn test_mm_cmple_epi32_mask() {
50635	let a = _mm_set_epi32(`0`, `1`, `-1`, `200`);
50636	let b = _mm_set1_epi32(`-1`);
50637	let r = _mm_cmple_epi32_mask(a, b);
50638	assert_eq!(r, `0b00000010`)
50639	}
50640
50641	#[simd_test(enable = "avx512f,avx512vl")]
50642	unsafe fn test_mm_mask_cmple_epi32_mask() {
50643	let a = _mm_set_epi32(`0`, `1`, `-1`, `200`);
50644	let b = _mm_set1_epi32(`-1`);
50645	let mask = `0b11111111`;
50646	let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50647	assert_eq!(r, `0b00000010`)
50648	}
50649
50650	#[simd_test(enable = "avx512f")]
50651	unsafe fn test_mm512_cmpge_epi32_mask() {
50652	#[rustfmt::skip]
50653	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50654	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50655	let b = _mm512_set1_epi32(`-1`);
50656	assert_eq!(
50657	_mm512_cmpge_epi32_mask(a, b),
50658	!_mm512_cmplt_epi32_mask(a, b)
50659	)
50660	}
50661
50662	#[simd_test(enable = "avx512f")]
50663	unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50664	#[rustfmt::skip]
50665	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
50666	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50667	let b = _mm512_set1_epi32(`-1`);
50668	let mask = `0b01111010_01111010`;
50669	assert_eq!(
50670	_mm512_mask_cmpge_epi32_mask(mask, a, b),
50671	`0b01111010_01111010`
50672	);
50673	}
50674
50675	#[simd_test(enable = "avx512f,avx512vl")]
50676	unsafe fn test_mm256_cmpge_epi32_mask() {
50677	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50678	let b = _mm256_set1_epi32(`-1`);
50679	let r = _mm256_cmpge_epi32_mask(a, b);
50680	assert_eq!(r, `0b11111010`)
50681	}
50682
50683	#[simd_test(enable = "avx512f,avx512vl")]
50684	unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50685	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
50686	let b = _mm256_set1_epi32(`-1`);
50687	let mask = `0b11111111`;
50688	let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50689	assert_eq!(r, `0b11111010`)
50690	}
50691
50692	#[simd_test(enable = "avx512f,avx512vl")]
50693	unsafe fn test_mm_cmpge_epi32_mask() {
50694	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
50695	let b = _mm_set1_epi32(`-1`);
50696	let r = _mm_cmpge_epi32_mask(a, b);
50697	assert_eq!(r, `0b00001111`)
50698	}
50699
50700	#[simd_test(enable = "avx512f,avx512vl")]
50701	unsafe fn test_mm_mask_cmpge_epi32_mask() {
50702	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
50703	let b = _mm_set1_epi32(`-1`);
50704	let mask = `0b11111111`;
50705	let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50706	assert_eq!(r, `0b00001111`)
50707	}
50708
50709	#[simd_test(enable = "avx512f")]
50710	unsafe fn test_mm512_cmpeq_epi32_mask() {
50711	#[rustfmt::skip]
50712	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50713	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50714	#[rustfmt::skip]
50715	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50716	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50717	let m = _mm512_cmpeq_epi32_mask(b, a);
50718	assert_eq!(m, `0b11001111_11001111`);
50719	}
50720
50721	#[simd_test(enable = "avx512f")]
50722	unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50723	#[rustfmt::skip]
50724	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50725	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50726	#[rustfmt::skip]
50727	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50728	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50729	let mask = `0b01111010_01111010`;
50730	let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50731	assert_eq!(r, `0b01001010_01001010`);
50732	}
50733
50734	#[simd_test(enable = "avx512f,avx512vl")]
50735	unsafe fn test_mm256_cmpeq_epi32_mask() {
50736	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50737	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50738	let m = _mm256_cmpeq_epi32_mask(b, a);
50739	assert_eq!(m, `0b11001111`);
50740	}
50741
50742	#[simd_test(enable = "avx512f,avx512vl")]
50743	unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50744	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50745	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50746	let mask = `0b01111010`;
50747	let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50748	assert_eq!(r, `0b01001010`);
50749	}
50750
50751	#[simd_test(enable = "avx512f,avx512vl")]
50752	unsafe fn test_mm_cmpeq_epi32_mask() {
50753	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50754	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50755	let m = _mm_cmpeq_epi32_mask(b, a);
50756	assert_eq!(m, `0b00001100`);
50757	}
50758
50759	#[simd_test(enable = "avx512f,avx512vl")]
50760	unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50761	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50762	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50763	let mask = `0b11111111`;
50764	let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50765	assert_eq!(r, `0b00001100`);
50766	}
50767
50768	#[simd_test(enable = "avx512f")]
50769	unsafe fn test_mm512_cmpneq_epi32_mask() {
50770	#[rustfmt::skip]
50771	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50772	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50773	#[rustfmt::skip]
50774	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50775	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50776	let m = _mm512_cmpneq_epi32_mask(b, a);
50777	assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50778	}
50779
50780	#[simd_test(enable = "avx512f")]
50781	unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50782	#[rustfmt::skip]
50783	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`,
50784	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`);
50785	#[rustfmt::skip]
50786	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
50787	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50788	let mask = `0b01111010_01111010`;
50789	let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50790	assert_eq!(r, `0b00110010_00110010`)
50791	}
50792
50793	#[simd_test(enable = "avx512f,avx512vl")]
50794	unsafe fn test_mm256_cmpneq_epi32_mask() {
50795	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50796	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50797	let m = _mm256_cmpneq_epi32_mask(b, a);
50798	assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50799	}
50800
50801	#[simd_test(enable = "avx512f,avx512vl")]
50802	unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50803	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`);
50804	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
50805	let mask = `0b11111111`;
50806	let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50807	assert_eq!(r, `0b00110011`)
50808	}
50809
50810	#[simd_test(enable = "avx512f,avx512vl")]
50811	unsafe fn test_mm_cmpneq_epi32_mask() {
50812	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50813	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50814	let r = _mm_cmpneq_epi32_mask(b, a);
50815	assert_eq!(r, `0b00000011`)
50816	}
50817
50818	#[simd_test(enable = "avx512f,avx512vl")]
50819	unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50820	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50821	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
50822	let mask = `0b11111111`;
50823	let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50824	assert_eq!(r, `0b00000011`)
50825	}
50826
50827	#[simd_test(enable = "avx512f")]
50828	unsafe fn test_mm512_cmp_epi32_mask() {
50829	#[rustfmt::skip]
50830	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50831	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50832	let b = _mm512_set1_epi32(`-1`);
50833	let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50834	assert_eq!(m, `0b00000101_00000101`);
50835	}
50836
50837	#[simd_test(enable = "avx512f")]
50838	unsafe fn test_mm512_mask_cmp_epi32_mask() {
50839	#[rustfmt::skip]
50840	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
50841	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50842	let b = _mm512_set1_epi32(`-1`);
50843	let mask = `0b01100110_01100110`;
50844	let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50845	assert_eq!(r, `0b00000100_00000100`);
50846	}
50847
50848	#[simd_test(enable = "avx512f,avx512vl")]
50849	unsafe fn test_mm256_cmp_epi32_mask() {
50850	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50851	let b = _mm256_set1_epi32(`-1`);
50852	let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50853	assert_eq!(m, `0b00000101`);
50854	}
50855
50856	#[simd_test(enable = "avx512f,avx512vl")]
50857	unsafe fn test_mm256_mask_cmp_epi32_mask() {
50858	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
50859	let b = _mm256_set1_epi32(`-1`);
50860	let mask = `0b01100110`;
50861	let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50862	assert_eq!(r, `0b00000100`);
50863	}
50864
50865	#[simd_test(enable = "avx512f,avx512vl")]
50866	unsafe fn test_mm_cmp_epi32_mask() {
50867	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50868	let b = _mm_set1_epi32(`1`);
50869	let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50870	assert_eq!(m, `0b00001010`);
50871	}
50872
50873	#[simd_test(enable = "avx512f,avx512vl")]
50874	unsafe fn test_mm_mask_cmp_epi32_mask() {
50875	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
50876	let b = _mm_set1_epi32(`1`);
50877	let mask = `0b11111111`;
50878	let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50879	assert_eq!(r, `0b00001010`);
50880	}
50881
50882	#[simd_test(enable = "avx512f")]
50883	unsafe fn test_mm512_set_epi8() {
50884	let r = _mm512_set1_epi8(`2`);
50885	assert_eq_m512i(
50886	r,
50887	_mm512_set_epi8(
50888	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50889	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50890	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50891	),
50892	)
50893	}
50894
50895	#[simd_test(enable = "avx512f")]
50896	unsafe fn test_mm512_set_epi16() {
50897	let r = _mm512_set1_epi16(`2`);
50898	assert_eq_m512i(
50899	r,
50900	_mm512_set_epi16(
50901	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50902	`2`, `2`, `2`, `2`,
50903	),
50904	)
50905	}
50906
50907	#[simd_test(enable = "avx512f")]
50908	unsafe fn test_mm512_set_epi32() {
50909	let r = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50910	assert_eq_m512i(
50911	r,
50912	_mm512_set_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`),
50913	)
50914	}
50915
50916	#[simd_test(enable = "avx512f")]
50917	unsafe fn test_mm512_setr_epi32() {
50918	let r = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50919	assert_eq_m512i(
50920	r,
50921	_mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`),
50922	)
50923	}
50924
50925	#[simd_test(enable = "avx512f")]
50926	unsafe fn test_mm512_set1_epi8() {
50927	let r = _mm512_set_epi8(
50928	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50929	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50930	`2`, `2`, `2`, `2`, `2`, `2`,
50931	);
50932	assert_eq_m512i(r, _mm512_set1_epi8(`2`));
50933	}
50934
50935	#[simd_test(enable = "avx512f")]
50936	unsafe fn test_mm512_set1_epi16() {
50937	let r = _mm512_set_epi16(
50938	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50939	`2`, `2`, `2`,
50940	);
50941	assert_eq_m512i(r, _mm512_set1_epi16(`2`));
50942	}
50943
50944	#[simd_test(enable = "avx512f")]
50945	unsafe fn test_mm512_set1_epi32() {
50946	let r = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
50947	assert_eq_m512i(r, _mm512_set1_epi32(`2`));
50948	}
50949
50950	#[simd_test(enable = "avx512f")]
50951	unsafe fn test_mm512_setzero_si512() {
50952	assert_eq_m512i(_mm512_set1_epi32(`0`), _mm512_setzero_si512());
50953	}
50954
50955	#[simd_test(enable = "avx512f")]
50956	unsafe fn test_mm512_setzero_epi32() {
50957	assert_eq_m512i(_mm512_set1_epi32(`0`), _mm512_setzero_epi32());
50958	}
50959
50960	#[simd_test(enable = "avx512f")]
50961	unsafe fn test_mm512_set_ps() {
50962	let r = _mm512_setr_ps(
50963	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
50964	);
50965	assert_eq_m512(
50966	r,
50967	_mm512_set_ps(
50968	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
50969	),
50970	)
50971	}
50972
50973	#[simd_test(enable = "avx512f")]
50974	unsafe fn test_mm512_setr_ps() {
50975	let r = _mm512_set_ps(
50976	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
50977	);
50978	assert_eq_m512(
50979	r,
50980	_mm512_setr_ps(
50981	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
50982	),
50983	)
50984	}
50985
50986	#[simd_test(enable = "avx512f")]
50987	unsafe fn test_mm512_set1_ps() {
50988	#[rustfmt::skip]
50989	let expected = _mm512_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
50990	`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`);
50991	assert_eq_m512(expected, _mm512_set1_ps(`2.`));
50992	}
50993
50994	#[simd_test(enable = "avx512f")]
50995	unsafe fn test_mm512_set4_epi32() {
50996	let r = _mm512_set_epi32(`4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`);
50997	assert_eq_m512i(r, _mm512_set4_epi32(`4`, `3`, `2`, `1`));
50998	}
50999
51000	#[simd_test(enable = "avx512f")]
51001	unsafe fn test_mm512_set4_ps() {
51002	let r = _mm512_set_ps(
51003	`4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`,
51004	);
51005	assert_eq_m512(r, _mm512_set4_ps(`4.`, `3.`, `2.`, `1.`));
51006	}
51007
51008	#[simd_test(enable = "avx512f")]
51009	unsafe fn test_mm512_setr4_epi32() {
51010	let r = _mm512_set_epi32(`4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`);
51011	assert_eq_m512i(r, _mm512_setr4_epi32(`1`, `2`, `3`, `4`));
51012	}
51013
51014	#[simd_test(enable = "avx512f")]
51015	unsafe fn test_mm512_setr4_ps() {
51016	let r = _mm512_set_ps(
51017	`4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`,
51018	);
51019	assert_eq_m512(r, _mm512_setr4_ps(`1.`, `2.`, `3.`, `4.`));
51020	}
51021
51022	#[simd_test(enable = "avx512f")]
51023	unsafe fn test_mm512_setzero_ps() {
51024	assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(`0.`));
51025	}
51026
51027	#[simd_test(enable = "avx512f")]
51028	unsafe fn test_mm512_setzero() {
51029	assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(`0.`));
51030	}
51031
51032	#[simd_test(enable = "avx512f")]
51033	unsafe fn test_mm512_loadu_pd() {
51034	let a = &[`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`];
51035	let p = a.as_ptr();
51036	let r = _mm512_loadu_pd(black_box(p));
51037	let e = _mm512_setr_pd(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
51038	assert_eq_m512d(r, e);
51039	}
51040
51041	#[simd_test(enable = "avx512f")]
51042	unsafe fn test_mm512_storeu_pd() {
51043	let a = _mm512_set1_pd(`9.`);
51044	let mut r = _mm512_undefined_pd();
51045	_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51046	assert_eq_m512d(r, a);
51047	}
51048
51049	#[simd_test(enable = "avx512f")]
51050	unsafe fn test_mm512_loadu_ps() {
51051	let a = &[
51052	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
51053	];
51054	let p = a.as_ptr();
51055	let r = _mm512_loadu_ps(black_box(p));
51056	let e = _mm512_setr_ps(
51057	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
51058	);
51059	assert_eq_m512(r, e);
51060	}
51061
51062	#[simd_test(enable = "avx512f")]
51063	unsafe fn test_mm512_storeu_ps() {
51064	let a = _mm512_set1_ps(`9.`);
51065	let mut r = _mm512_undefined_ps();
51066	_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51067	assert_eq_m512(r, a);
51068	}
51069
51070	#[simd_test(enable = "avx512f")]
51071	unsafe fn test_mm512_mask_loadu_epi32() {
51072	let src = _mm512_set1_epi32(`42`);
51073	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
51074	let p = a.as_ptr();
51075	let m = `0b11101000_11001010`;
51076	let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51077	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
51078	assert_eq_m512i(r, e);
51079	}
51080
51081	#[simd_test(enable = "avx512f")]
51082	unsafe fn test_mm512_maskz_loadu_epi32() {
51083	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
51084	let p = a.as_ptr();
51085	let m = `0b11101000_11001010`;
51086	let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51087	let e = _mm512_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`, `0`, `0`, `0`, `12`, `0`, `14`, `15`, `16`);
51088	assert_eq_m512i(r, e);
51089	}
51090
51091	#[simd_test(enable = "avx512f")]
51092	unsafe fn test_mm512_mask_load_epi32() {
51093	#[repr(align(`64`))]
51094	struct Align {
51095	data: [i32; `16`], // 64 bytes
51096	}
51097	let src = _mm512_set1_epi32(`42`);
51098	let a = Align {
51099	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`],
51100	};
51101	let p = a.data.as_ptr();
51102	let m = `0b11101000_11001010`;
51103	let r = _mm512_mask_load_epi32(src, m, black_box(p));
51104	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
51105	assert_eq_m512i(r, e);
51106	}
51107
51108	#[simd_test(enable = "avx512f")]
51109	unsafe fn test_mm512_maskz_load_epi32() {
51110	#[repr(align(`64`))]
51111	struct Align {
51112	data: [i32; `16`], // 64 bytes
51113	}
51114	let a = Align {
51115	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`],
51116	};
51117	let p = a.data.as_ptr();
51118	let m = `0b11101000_11001010`;
51119	let r = _mm512_maskz_load_epi32(m, black_box(p));
51120	let e = _mm512_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`, `0`, `0`, `0`, `12`, `0`, `14`, `15`, `16`);
51121	assert_eq_m512i(r, e);
51122	}
51123
51124	#[simd_test(enable = "avx512f")]
51125	unsafe fn test_mm512_mask_storeu_epi32() {
51126	let mut r = [`42_i32`; `16`];
51127	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
51128	let m = `0b11101000_11001010`;
51129	_mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51130	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
51131	assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51132	}
51133
51134	#[simd_test(enable = "avx512f")]
51135	unsafe fn test_mm512_mask_store_epi32() {
51136	#[repr(align(`64`))]
51137	struct Align {
51138	data: [i32; `16`],
51139	}
51140	let mut r = Align { data: [`42`; `16`] };
51141	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
51142	let m = `0b11101000_11001010`;
51143	_mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51144	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
51145	assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51146	}
51147
51148	#[simd_test(enable = "avx512f")]
51149	unsafe fn test_mm512_mask_loadu_epi64() {
51150	let src = _mm512_set1_epi64(`42`);
51151	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
51152	let p = a.as_ptr();
51153	let m = `0b11001010`;
51154	let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51155	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51156	assert_eq_m512i(r, e);
51157	}
51158
51159	#[simd_test(enable = "avx512f")]
51160	unsafe fn test_mm512_maskz_loadu_epi64() {
51161	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
51162	let p = a.as_ptr();
51163	let m = `0b11001010`;
51164	let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51165	let e = _mm512_setr_epi64(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
51166	assert_eq_m512i(r, e);
51167	}
51168
51169	#[simd_test(enable = "avx512f")]
51170	unsafe fn test_mm512_mask_load_epi64() {
51171	#[repr(align(`64`))]
51172	struct Align {
51173	data: [i64; `8`], // 64 bytes
51174	}
51175	let src = _mm512_set1_epi64(`42`);
51176	let a = Align {
51177	data: [`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
51178	};
51179	let p = a.data.as_ptr();
51180	let m = `0b11001010`;
51181	let r = _mm512_mask_load_epi64(src, m, black_box(p));
51182	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51183	assert_eq_m512i(r, e);
51184	}
51185
51186	#[simd_test(enable = "avx512f")]
51187	unsafe fn test_mm512_maskz_load_epi64() {
51188	#[repr(align(`64`))]
51189	struct Align {
51190	data: [i64; `8`], // 64 bytes
51191	}
51192	let a = Align {
51193	data: [`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
51194	};
51195	let p = a.data.as_ptr();
51196	let m = `0b11001010`;
51197	let r = _mm512_maskz_load_epi64(m, black_box(p));
51198	let e = _mm512_setr_epi64(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
51199	assert_eq_m512i(r, e);
51200	}
51201
51202	#[simd_test(enable = "avx512f")]
51203	unsafe fn test_mm512_mask_storeu_epi64() {
51204	let mut r = [`42_i64`; `8`];
51205	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
51206	let m = `0b11001010`;
51207	_mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51208	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51209	assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51210	}
51211
51212	#[simd_test(enable = "avx512f")]
51213	unsafe fn test_mm512_mask_store_epi64() {
51214	#[repr(align(`64`))]
51215	struct Align {
51216	data: [i64; `8`],
51217	}
51218	let mut r = Align { data: [`42`; `8`] };
51219	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
51220	let m = `0b11001010`;
51221	let p = r.data.as_mut_ptr();
51222	_mm512_mask_store_epi64(p, m, a);
51223	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51224	assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51225	}
51226
51227	#[simd_test(enable = "avx512f")]
51228	unsafe fn test_mm512_mask_loadu_ps() {
51229	let src = _mm512_set1_ps(`42.0`);
51230	let a = &[
51231	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
51232	`16.0`,
51233	];
51234	let p = a.as_ptr();
51235	let m = `0b11101000_11001010`;
51236	let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51237	let e = _mm512_setr_ps(
51238	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
51239	`16.0`,
51240	);
51241	assert_eq_m512(r, e);
51242	}
51243
51244	#[simd_test(enable = "avx512f")]
51245	unsafe fn test_mm512_maskz_loadu_ps() {
51246	let a = &[
51247	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
51248	`16.0`,
51249	];
51250	let p = a.as_ptr();
51251	let m = `0b11101000_11001010`;
51252	let r = _mm512_maskz_loadu_ps(m, black_box(p));
51253	let e = _mm512_setr_ps(
51254	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`, `0.0`, `0.0`, `0.0`, `12.0`, `0.0`, `14.0`, `15.0`, `16.0`,
51255	);
51256	assert_eq_m512(r, e);
51257	}
51258
51259	#[simd_test(enable = "avx512f")]
51260	unsafe fn test_mm512_mask_load_ps() {
51261	#[repr(align(`64`))]
51262	struct Align {
51263	data: [f32; `16`], // 64 bytes
51264	}
51265	let src = _mm512_set1_ps(`42.0`);
51266	let a = Align {
51267	data: [
51268	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
51269	`15.0`, `16.0`,
51270	],
51271	};
51272	let p = a.data.as_ptr();
51273	let m = `0b11101000_11001010`;
51274	let r = _mm512_mask_load_ps(src, m, black_box(p));
51275	let e = _mm512_setr_ps(
51276	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
51277	`16.0`,
51278	);
51279	assert_eq_m512(r, e);
51280	}
51281
51282	#[simd_test(enable = "avx512f")]
51283	unsafe fn test_mm512_maskz_load_ps() {
51284	#[repr(align(`64`))]
51285	struct Align {
51286	data: [f32; `16`], // 64 bytes
51287	}
51288	let a = Align {
51289	data: [
51290	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
51291	`15.0`, `16.0`,
51292	],
51293	};
51294	let p = a.data.as_ptr();
51295	let m = `0b11101000_11001010`;
51296	let r = _mm512_maskz_load_ps(m, black_box(p));
51297	let e = _mm512_setr_ps(
51298	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`, `0.0`, `0.0`, `0.0`, `12.0`, `0.0`, `14.0`, `15.0`, `16.0`,
51299	);
51300	assert_eq_m512(r, e);
51301	}
51302
51303	#[simd_test(enable = "avx512f")]
51304	unsafe fn test_mm512_mask_storeu_ps() {
51305	let mut r = [`42_f32`; `16`];
51306	let a = _mm512_setr_ps(
51307	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
51308	);
51309	let m = `0b11101000_11001010`;
51310	_mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51311	let e = _mm512_setr_ps(
51312	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
51313	`16.0`,
51314	);
51315	assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51316	}
51317
51318	#[simd_test(enable = "avx512f")]
51319	unsafe fn test_mm512_mask_store_ps() {
51320	#[repr(align(`64`))]
51321	struct Align {
51322	data: [f32; `16`],
51323	}
51324	let mut r = Align { data: [`42.0`; `16`] };
51325	let a = _mm512_setr_ps(
51326	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
51327	);
51328	let m = `0b11101000_11001010`;
51329	_mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51330	let e = _mm512_setr_ps(
51331	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
51332	`16.0`,
51333	);
51334	assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51335	}
51336
51337	#[simd_test(enable = "avx512f")]
51338	unsafe fn test_mm512_mask_loadu_pd() {
51339	let src = _mm512_set1_pd(`42.0`);
51340	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
51341	let p = a.as_ptr();
51342	let m = `0b11001010`;
51343	let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51344	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51345	assert_eq_m512d(r, e);
51346	}
51347
51348	#[simd_test(enable = "avx512f")]
51349	unsafe fn test_mm512_maskz_loadu_pd() {
51350	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
51351	let p = a.as_ptr();
51352	let m = `0b11001010`;
51353	let r = _mm512_maskz_loadu_pd(m, black_box(p));
51354	let e = _mm512_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
51355	assert_eq_m512d(r, e);
51356	}
51357
51358	#[simd_test(enable = "avx512f")]
51359	unsafe fn test_mm512_mask_load_pd() {
51360	#[repr(align(`64`))]
51361	struct Align {
51362	data: [f64; `8`], // 64 bytes
51363	}
51364	let src = _mm512_set1_pd(`42.0`);
51365	let a = Align {
51366	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
51367	};
51368	let p = a.data.as_ptr();
51369	let m = `0b11001010`;
51370	let r = _mm512_mask_load_pd(src, m, black_box(p));
51371	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51372	assert_eq_m512d(r, e);
51373	}
51374
51375	#[simd_test(enable = "avx512f")]
51376	unsafe fn test_mm512_maskz_load_pd() {
51377	#[repr(align(`64`))]
51378	struct Align {
51379	data: [f64; `8`], // 64 bytes
51380	}
51381	let a = Align {
51382	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
51383	};
51384	let p = a.data.as_ptr();
51385	let m = `0b11001010`;
51386	let r = _mm512_maskz_load_pd(m, black_box(p));
51387	let e = _mm512_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
51388	assert_eq_m512d(r, e);
51389	}
51390
51391	#[simd_test(enable = "avx512f")]
51392	unsafe fn test_mm512_mask_storeu_pd() {
51393	let mut r = [`42_f64`; `8`];
51394	let a = _mm512_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
51395	let m = `0b11001010`;
51396	_mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51397	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51398	assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51399	}
51400
51401	#[simd_test(enable = "avx512f")]
51402	unsafe fn test_mm512_mask_store_pd() {
51403	#[repr(align(`64`))]
51404	struct Align {
51405	data: [f64; `8`],
51406	}
51407	let mut r = Align { data: [`42.0`; `8`] };
51408	let a = _mm512_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
51409	let m = `0b11001010`;
51410	_mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51411	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51412	assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51413	}
51414
51415	#[simd_test(enable = "avx512f,avx512vl")]
51416	unsafe fn test_mm256_mask_loadu_epi32() {
51417	let src = _mm256_set1_epi32(`42`);
51418	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
51419	let p = a.as_ptr();
51420	let m = `0b11001010`;
51421	let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51422	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51423	assert_eq_m256i(r, e);
51424	}
51425
51426	#[simd_test(enable = "avx512f,avx512vl")]
51427	unsafe fn test_mm256_maskz_loadu_epi32() {
51428	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
51429	let p = a.as_ptr();
51430	let m = `0b11001010`;
51431	let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51432	let e = _mm256_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
51433	assert_eq_m256i(r, e);
51434	}
51435
51436	#[simd_test(enable = "avx512f,avx512vl")]
51437	unsafe fn test_mm256_mask_load_epi32() {
51438	#[repr(align(`32`))]
51439	struct Align {
51440	data: [i32; `8`], // 32 bytes
51441	}
51442	let src = _mm256_set1_epi32(`42`);
51443	let a = Align {
51444	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
51445	};
51446	let p = a.data.as_ptr();
51447	let m = `0b11001010`;
51448	let r = _mm256_mask_load_epi32(src, m, black_box(p));
51449	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51450	assert_eq_m256i(r, e);
51451	}
51452
51453	#[simd_test(enable = "avx512f,avx512vl")]
51454	unsafe fn test_mm256_maskz_load_epi32() {
51455	#[repr(align(`32`))]
51456	struct Align {
51457	data: [i32; `8`], // 32 bytes
51458	}
51459	let a = Align {
51460	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
51461	};
51462	let p = a.data.as_ptr();
51463	let m = `0b11001010`;
51464	let r = _mm256_maskz_load_epi32(m, black_box(p));
51465	let e = _mm256_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
51466	assert_eq_m256i(r, e);
51467	}
51468
51469	#[simd_test(enable = "avx512f,avx512vl")]
51470	unsafe fn test_mm256_mask_storeu_epi32() {
51471	let mut r = [`42_i32`; `8`];
51472	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
51473	let m = `0b11001010`;
51474	_mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51475	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51476	assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51477	}
51478
51479	#[simd_test(enable = "avx512f,avx512vl")]
51480	unsafe fn test_mm256_mask_store_epi32() {
51481	#[repr(align(`64`))]
51482	struct Align {
51483	data: [i32; `8`],
51484	}
51485	let mut r = Align { data: [`42`; `8`] };
51486	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
51487	let m = `0b11001010`;
51488	_mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51489	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
51490	assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51491	}
51492
51493	#[simd_test(enable = "avx512f,avx512vl")]
51494	unsafe fn test_mm256_mask_loadu_epi64() {
51495	let src = _mm256_set1_epi64x(`42`);
51496	let a = &[`1_i64`, `2`, `3`, `4`];
51497	let p = a.as_ptr();
51498	let m = `0b1010`;
51499	let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51500	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
51501	assert_eq_m256i(r, e);
51502	}
51503
51504	#[simd_test(enable = "avx512f,avx512vl")]
51505	unsafe fn test_mm256_maskz_loadu_epi64() {
51506	let a = &[`1_i64`, `2`, `3`, `4`];
51507	let p = a.as_ptr();
51508	let m = `0b1010`;
51509	let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51510	let e = _mm256_setr_epi64x(`0`, `2`, `0`, `4`);
51511	assert_eq_m256i(r, e);
51512	}
51513
51514	#[simd_test(enable = "avx512f,avx512vl")]
51515	unsafe fn test_mm256_mask_load_epi64() {
51516	#[repr(align(`32`))]
51517	struct Align {
51518	data: [i64; `4`], // 32 bytes
51519	}
51520	let src = _mm256_set1_epi64x(`42`);
51521	let a = Align {
51522	data: [`1_i64`, `2`, `3`, `4`],
51523	};
51524	let p = a.data.as_ptr();
51525	let m = `0b1010`;
51526	let r = _mm256_mask_load_epi64(src, m, black_box(p));
51527	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
51528	assert_eq_m256i(r, e);
51529	}
51530
51531	#[simd_test(enable = "avx512f,avx512vl")]
51532	unsafe fn test_mm256_maskz_load_epi64() {
51533	#[repr(align(`32`))]
51534	struct Align {
51535	data: [i64; `4`], // 32 bytes
51536	}
51537	let a = Align {
51538	data: [`1_i64`, `2`, `3`, `4`],
51539	};
51540	let p = a.data.as_ptr();
51541	let m = `0b1010`;
51542	let r = _mm256_maskz_load_epi64(m, black_box(p));
51543	let e = _mm256_setr_epi64x(`0`, `2`, `0`, `4`);
51544	assert_eq_m256i(r, e);
51545	}
51546
51547	#[simd_test(enable = "avx512f,avx512vl")]
51548	unsafe fn test_mm256_mask_storeu_epi64() {
51549	let mut r = [`42_i64`; `4`];
51550	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
51551	let m = `0b1010`;
51552	_mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51553	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
51554	assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51555	}
51556
51557	#[simd_test(enable = "avx512f,avx512vl")]
51558	unsafe fn test_mm256_mask_store_epi64() {
51559	#[repr(align(`32`))]
51560	struct Align {
51561	data: [i64; `4`],
51562	}
51563	let mut r = Align { data: [`42`; `4`] };
51564	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
51565	let m = `0b1010`;
51566	_mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51567	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
51568	assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51569	}
51570
51571	#[simd_test(enable = "avx512f,avx512vl")]
51572	unsafe fn test_mm256_mask_loadu_ps() {
51573	let src = _mm256_set1_ps(`42.0`);
51574	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
51575	let p = a.as_ptr();
51576	let m = `0b11001010`;
51577	let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51578	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51579	assert_eq_m256(r, e);
51580	}
51581
51582	#[simd_test(enable = "avx512f,avx512vl")]
51583	unsafe fn test_mm256_maskz_loadu_ps() {
51584	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
51585	let p = a.as_ptr();
51586	let m = `0b11001010`;
51587	let r = _mm256_maskz_loadu_ps(m, black_box(p));
51588	let e = _mm256_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
51589	assert_eq_m256(r, e);
51590	}
51591
51592	#[simd_test(enable = "avx512f,avx512vl")]
51593	unsafe fn test_mm256_mask_load_ps() {
51594	#[repr(align(`32`))]
51595	struct Align {
51596	data: [f32; `8`], // 32 bytes
51597	}
51598	let src = _mm256_set1_ps(`42.0`);
51599	let a = Align {
51600	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
51601	};
51602	let p = a.data.as_ptr();
51603	let m = `0b11001010`;
51604	let r = _mm256_mask_load_ps(src, m, black_box(p));
51605	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51606	assert_eq_m256(r, e);
51607	}
51608
51609	#[simd_test(enable = "avx512f,avx512vl")]
51610	unsafe fn test_mm256_maskz_load_ps() {
51611	#[repr(align(`32`))]
51612	struct Align {
51613	data: [f32; `8`], // 32 bytes
51614	}
51615	let a = Align {
51616	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
51617	};
51618	let p = a.data.as_ptr();
51619	let m = `0b11001010`;
51620	let r = _mm256_maskz_load_ps(m, black_box(p));
51621	let e = _mm256_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
51622	assert_eq_m256(r, e);
51623	}
51624
51625	#[simd_test(enable = "avx512f,avx512vl")]
51626	unsafe fn test_mm256_mask_storeu_ps() {
51627	let mut r = [`42_f32`; `8`];
51628	let a = _mm256_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
51629	let m = `0b11001010`;
51630	_mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51631	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51632	assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51633	}
51634
51635	#[simd_test(enable = "avx512f,avx512vl")]
51636	unsafe fn test_mm256_mask_store_ps() {
51637	#[repr(align(`32`))]
51638	struct Align {
51639	data: [f32; `8`],
51640	}
51641	let mut r = Align { data: [`42.0`; `8`] };
51642	let a = _mm256_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
51643	let m = `0b11001010`;
51644	_mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51645	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
51646	assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51647	}
51648
51649	#[simd_test(enable = "avx512f,avx512vl")]
51650	unsafe fn test_mm256_mask_loadu_pd() {
51651	let src = _mm256_set1_pd(`42.0`);
51652	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`];
51653	let p = a.as_ptr();
51654	let m = `0b1010`;
51655	let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51656	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
51657	assert_eq_m256d(r, e);
51658	}
51659
51660	#[simd_test(enable = "avx512f,avx512vl")]
51661	unsafe fn test_mm256_maskz_loadu_pd() {
51662	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`];
51663	let p = a.as_ptr();
51664	let m = `0b1010`;
51665	let r = _mm256_maskz_loadu_pd(m, black_box(p));
51666	let e = _mm256_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`);
51667	assert_eq_m256d(r, e);
51668	}
51669
51670	#[simd_test(enable = "avx512f,avx512vl")]
51671	unsafe fn test_mm256_mask_load_pd() {
51672	#[repr(align(`32`))]
51673	struct Align {
51674	data: [f64; `4`], // 32 bytes
51675	}
51676	let src = _mm256_set1_pd(`42.0`);
51677	let a = Align {
51678	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`],
51679	};
51680	let p = a.data.as_ptr();
51681	let m = `0b1010`;
51682	let r = _mm256_mask_load_pd(src, m, black_box(p));
51683	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
51684	assert_eq_m256d(r, e);
51685	}
51686
51687	#[simd_test(enable = "avx512f,avx512vl")]
51688	unsafe fn test_mm256_maskz_load_pd() {
51689	#[repr(align(`32`))]
51690	struct Align {
51691	data: [f64; `4`], // 32 bytes
51692	}
51693	let a = Align {
51694	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`],
51695	};
51696	let p = a.data.as_ptr();
51697	let m = `0b1010`;
51698	let r = _mm256_maskz_load_pd(m, black_box(p));
51699	let e = _mm256_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`);
51700	assert_eq_m256d(r, e);
51701	}
51702
51703	#[simd_test(enable = "avx512f,avx512vl")]
51704	unsafe fn test_mm256_mask_storeu_pd() {
51705	let mut r = [`42_f64`; `4`];
51706	let a = _mm256_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`);
51707	let m = `0b1010`;
51708	_mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51709	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
51710	assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51711	}
51712
51713	#[simd_test(enable = "avx512f,avx512vl")]
51714	unsafe fn test_mm256_mask_store_pd() {
51715	#[repr(align(`32`))]
51716	struct Align {
51717	data: [f64; `4`],
51718	}
51719	let mut r = Align { data: [`42.0`; `4`] };
51720	let a = _mm256_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`);
51721	let m = `0b1010`;
51722	_mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51723	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
51724	assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51725	}
51726
51727	#[simd_test(enable = "avx512f,avx512vl")]
51728	unsafe fn test_mm_mask_loadu_epi32() {
51729	let src = _mm_set1_epi32(`42`);
51730	let a = &[`1_i32`, `2`, `3`, `4`];
51731	let p = a.as_ptr();
51732	let m = `0b1010`;
51733	let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51734	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
51735	assert_eq_m128i(r, e);
51736	}
51737
51738	#[simd_test(enable = "avx512f,avx512vl")]
51739	unsafe fn test_mm_maskz_loadu_epi32() {
51740	let a = &[`1_i32`, `2`, `3`, `4`];
51741	let p = a.as_ptr();
51742	let m = `0b1010`;
51743	let r = _mm_maskz_loadu_epi32(m, black_box(p));
51744	let e = _mm_setr_epi32(`0`, `2`, `0`, `4`);
51745	assert_eq_m128i(r, e);
51746	}
51747
51748	#[simd_test(enable = "avx512f,avx512vl")]
51749	unsafe fn test_mm_mask_load_epi32() {
51750	#[repr(align(`16`))]
51751	struct Align {
51752	data: [i32; `4`], // 32 bytes
51753	}
51754	let src = _mm_set1_epi32(`42`);
51755	let a = Align {
51756	data: [`1_i32`, `2`, `3`, `4`],
51757	};
51758	let p = a.data.as_ptr();
51759	let m = `0b1010`;
51760	let r = _mm_mask_load_epi32(src, m, black_box(p));
51761	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
51762	assert_eq_m128i(r, e);
51763	}
51764
51765	#[simd_test(enable = "avx512f,avx512vl")]
51766	unsafe fn test_mm_maskz_load_epi32() {
51767	#[repr(align(`16`))]
51768	struct Align {
51769	data: [i32; `4`], // 16 bytes
51770	}
51771	let a = Align {
51772	data: [`1_i32`, `2`, `3`, `4`],
51773	};
51774	let p = a.data.as_ptr();
51775	let m = `0b1010`;
51776	let r = _mm_maskz_load_epi32(m, black_box(p));
51777	let e = _mm_setr_epi32(`0`, `2`, `0`, `4`);
51778	assert_eq_m128i(r, e);
51779	}
51780
51781	#[simd_test(enable = "avx512f,avx512vl")]
51782	unsafe fn test_mm_mask_storeu_epi32() {
51783	let mut r = [`42_i32`; `4`];
51784	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
51785	let m = `0b1010`;
51786	_mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51787	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
51788	assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51789	}
51790
51791	#[simd_test(enable = "avx512f,avx512vl")]
51792	unsafe fn test_mm_mask_store_epi32() {
51793	#[repr(align(`16`))]
51794	struct Align {
51795	data: [i32; `4`], // 16 bytes
51796	}
51797	let mut r = Align { data: [`42`; `4`] };
51798	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
51799	let m = `0b1010`;
51800	_mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51801	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
51802	assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51803	}
51804
51805	#[simd_test(enable = "avx512f,avx512vl")]
51806	unsafe fn test_mm_mask_loadu_epi64() {
51807	let src = _mm_set1_epi64x(`42`);
51808	let a = &[`1_i64`, `2`];
51809	let p = a.as_ptr();
51810	let m = `0b10`;
51811	let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51812	let e = _mm_setr_epi64x(`42`, `2`);
51813	assert_eq_m128i(r, e);
51814	}
51815
51816	#[simd_test(enable = "avx512f,avx512vl")]
51817	unsafe fn test_mm_maskz_loadu_epi64() {
51818	let a = &[`1_i64`, `2`];
51819	let p = a.as_ptr();
51820	let m = `0b10`;
51821	let r = _mm_maskz_loadu_epi64(m, black_box(p));
51822	let e = _mm_setr_epi64x(`0`, `2`);
51823	assert_eq_m128i(r, e);
51824	}
51825
51826	#[simd_test(enable = "avx512f,avx512vl")]
51827	unsafe fn test_mm_mask_load_epi64() {
51828	#[repr(align(`16`))]
51829	struct Align {
51830	data: [i64; `2`], // 16 bytes
51831	}
51832	let src = _mm_set1_epi64x(`42`);
51833	let a = Align { data: [`1_i64`, `2`] };
51834	let p = a.data.as_ptr();
51835	let m = `0b10`;
51836	let r = _mm_mask_load_epi64(src, m, black_box(p));
51837	let e = _mm_setr_epi64x(`42`, `2`);
51838	assert_eq_m128i(r, e);
51839	}
51840
51841	#[simd_test(enable = "avx512f,avx512vl")]
51842	unsafe fn test_mm_maskz_load_epi64() {
51843	#[repr(align(`16`))]
51844	struct Align {
51845	data: [i64; `2`], // 16 bytes
51846	}
51847	let a = Align { data: [`1_i64`, `2`] };
51848	let p = a.data.as_ptr();
51849	let m = `0b10`;
51850	let r = _mm_maskz_load_epi64(m, black_box(p));
51851	let e = _mm_setr_epi64x(`0`, `2`);
51852	assert_eq_m128i(r, e);
51853	}
51854
51855	#[simd_test(enable = "avx512f,avx512vl")]
51856	unsafe fn test_mm_mask_storeu_epi64() {
51857	let mut r = [`42_i64`; `2`];
51858	let a = _mm_setr_epi64x(`1`, `2`);
51859	let m = `0b10`;
51860	_mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51861	let e = _mm_setr_epi64x(`42`, `2`);
51862	assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51863	}
51864
51865	#[simd_test(enable = "avx512f,avx512vl")]
51866	unsafe fn test_mm_mask_store_epi64() {
51867	#[repr(align(`16`))]
51868	struct Align {
51869	data: [i64; `2`], // 16 bytes
51870	}
51871	let mut r = Align { data: [`42`; `2`] };
51872	let a = _mm_setr_epi64x(`1`, `2`);
51873	let m = `0b10`;
51874	_mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51875	let e = _mm_setr_epi64x(`42`, `2`);
51876	assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51877	}
51878
51879	#[simd_test(enable = "avx512f,avx512vl")]
51880	unsafe fn test_mm_mask_loadu_ps() {
51881	let src = _mm_set1_ps(`42.0`);
51882	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`];
51883	let p = a.as_ptr();
51884	let m = `0b1010`;
51885	let r = _mm_mask_loadu_ps(src, m, black_box(p));
51886	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
51887	assert_eq_m128(r, e);
51888	}
51889
51890	#[simd_test(enable = "avx512f,avx512vl")]
51891	unsafe fn test_mm_maskz_loadu_ps() {
51892	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`];
51893	let p = a.as_ptr();
51894	let m = `0b1010`;
51895	let r = _mm_maskz_loadu_ps(m, black_box(p));
51896	let e = _mm_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`);
51897	assert_eq_m128(r, e);
51898	}
51899
51900	#[simd_test(enable = "avx512f,avx512vl")]
51901	unsafe fn test_mm_mask_load_ps() {
51902	#[repr(align(`16`))]
51903	struct Align {
51904	data: [f32; `4`], // 16 bytes
51905	}
51906	let src = _mm_set1_ps(`42.0`);
51907	let a = Align {
51908	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`],
51909	};
51910	let p = a.data.as_ptr();
51911	let m = `0b1010`;
51912	let r = _mm_mask_load_ps(src, m, black_box(p));
51913	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
51914	assert_eq_m128(r, e);
51915	}
51916
51917	#[simd_test(enable = "avx512f,avx512vl")]
51918	unsafe fn test_mm_maskz_load_ps() {
51919	#[repr(align(`16`))]
51920	struct Align {
51921	data: [f32; `4`], // 16 bytes
51922	}
51923	let a = Align {
51924	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`],
51925	};
51926	let p = a.data.as_ptr();
51927	let m = `0b1010`;
51928	let r = _mm_maskz_load_ps(m, black_box(p));
51929	let e = _mm_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`);
51930	assert_eq_m128(r, e);
51931	}
51932
51933	#[simd_test(enable = "avx512f,avx512vl")]
51934	unsafe fn test_mm_mask_storeu_ps() {
51935	let mut r = [`42_f32`; `4`];
51936	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
51937	let m = `0b1010`;
51938	_mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
51939	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
51940	assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
51941	}
51942
51943	#[simd_test(enable = "avx512f,avx512vl")]
51944	unsafe fn test_mm_mask_store_ps() {
51945	#[repr(align(`16`))]
51946	struct Align {
51947	data: [f32; `4`], // 16 bytes
51948	}
51949	let mut r = Align { data: [`42.0`; `4`] };
51950	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
51951	let m = `0b1010`;
51952	_mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
51953	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
51954	assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
51955	}
51956
51957	#[simd_test(enable = "avx512f,avx512vl")]
51958	unsafe fn test_mm_mask_loadu_pd() {
51959	let src = _mm_set1_pd(`42.0`);
51960	let a = &[`1.0_f64`, `2.0`];
51961	let p = a.as_ptr();
51962	let m = `0b10`;
51963	let r = _mm_mask_loadu_pd(src, m, black_box(p));
51964	let e = _mm_setr_pd(`42.0`, `2.0`);
51965	assert_eq_m128d(r, e);
51966	}
51967
51968	#[simd_test(enable = "avx512f,avx512vl")]
51969	unsafe fn test_mm_maskz_loadu_pd() {
51970	let a = &[`1.0_f64`, `2.0`];
51971	let p = a.as_ptr();
51972	let m = `0b10`;
51973	let r = _mm_maskz_loadu_pd(m, black_box(p));
51974	let e = _mm_setr_pd(`0.0`, `2.0`);
51975	assert_eq_m128d(r, e);
51976	}
51977
51978	#[simd_test(enable = "avx512f,avx512vl")]
51979	unsafe fn test_mm_mask_load_pd() {
51980	#[repr(align(`16`))]
51981	struct Align {
51982	data: [f64; `2`], // 16 bytes
51983	}
51984	let src = _mm_set1_pd(`42.0`);
51985	let a = Align {
51986	data: [`1.0_f64`, `2.0`],
51987	};
51988	let p = a.data.as_ptr();
51989	let m = `0b10`;
51990	let r = _mm_mask_load_pd(src, m, black_box(p));
51991	let e = _mm_setr_pd(`42.0`, `2.0`);
51992	assert_eq_m128d(r, e);
51993	}
51994
51995	#[simd_test(enable = "avx512f,avx512vl")]
51996	unsafe fn test_mm_maskz_load_pd() {
51997	#[repr(align(`16`))]
51998	struct Align {
51999	data: [f64; `2`], // 16 bytes
52000	}
52001	let a = Align {
52002	data: [`1.0_f64`, `2.0`],
52003	};
52004	let p = a.data.as_ptr();
52005	let m = `0b10`;
52006	let r = _mm_maskz_load_pd(m, black_box(p));
52007	let e = _mm_setr_pd(`0.0`, `2.0`);
52008	assert_eq_m128d(r, e);
52009	}
52010
52011	#[simd_test(enable = "avx512f")]
52012	unsafe fn test_mm_mask_load_ss() {
52013	#[repr(align(`16`))]
52014	struct Align {
52015	data: f32,
52016	}
52017	let src = _mm_set_ss(`2.0`);
52018	let mem = Align { data: `1.0` };
52019	let r = _mm_mask_load_ss(src, `0b1`, &mem.data);
52020	assert_eq_m128(r, _mm_set_ss(`1.0`));
52021	let r = _mm_mask_load_ss(src, `0b0`, &mem.data);
52022	assert_eq_m128(r, _mm_set_ss(`2.0`));
52023	}
52024
52025	#[simd_test(enable = "avx512f")]
52026	unsafe fn test_mm_maskz_load_ss() {
52027	#[repr(align(`16`))]
52028	struct Align {
52029	data: f32,
52030	}
52031	let mem = Align { data: `1.0` };
52032	let r = _mm_maskz_load_ss(`0b1`, &mem.data);
52033	assert_eq_m128(r, _mm_set_ss(`1.0`));
52034	let r = _mm_maskz_load_ss(`0b0`, &mem.data);
52035	assert_eq_m128(r, _mm_set_ss(`0.0`));
52036	}
52037
52038	#[simd_test(enable = "avx512f")]
52039	unsafe fn test_mm_mask_load_sd() {
52040	#[repr(align(`16`))]
52041	struct Align {
52042	data: f64,
52043	}
52044	let src = _mm_set_sd(`2.0`);
52045	let mem = Align { data: `1.0` };
52046	let r = _mm_mask_load_sd(src, `0b1`, &mem.data);
52047	assert_eq_m128d(r, _mm_set_sd(`1.0`));
52048	let r = _mm_mask_load_sd(src, `0b0`, &mem.data);
52049	assert_eq_m128d(r, _mm_set_sd(`2.0`));
52050	}
52051
52052	#[simd_test(enable = "avx512f")]
52053	unsafe fn test_mm_maskz_load_sd() {
52054	#[repr(align(`16`))]
52055	struct Align {
52056	data: f64,
52057	}
52058	let mem = Align { data: `1.0` };
52059	let r = _mm_maskz_load_sd(`0b1`, &mem.data);
52060	assert_eq_m128d(r, _mm_set_sd(`1.0`));
52061	let r = _mm_maskz_load_sd(`0b0`, &mem.data);
52062	assert_eq_m128d(r, _mm_set_sd(`0.0`));
52063	}
52064
52065	#[simd_test(enable = "avx512f,avx512vl")]
52066	unsafe fn test_mm_mask_storeu_pd() {
52067	let mut r = [`42_f64`; `2`];
52068	let a = _mm_setr_pd(`1.0`, `2.0`);
52069	let m = `0b10`;
52070	_mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52071	let e = _mm_setr_pd(`42.0`, `2.0`);
52072	assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52073	}
52074
52075	#[simd_test(enable = "avx512f,avx512vl")]
52076	unsafe fn test_mm_mask_store_pd() {
52077	#[repr(align(`16`))]
52078	struct Align {
52079	data: [f64; `2`], // 16 bytes
52080	}
52081	let mut r = Align { data: [`42.0`; `2`] };
52082	let a = _mm_setr_pd(`1.0`, `2.0`);
52083	let m = `0b10`;
52084	_mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52085	let e = _mm_setr_pd(`42.0`, `2.0`);
52086	assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52087	}
52088
52089	#[simd_test(enable = "avx512f")]
52090	unsafe fn test_mm_mask_store_ss() {
52091	#[repr(align(`16`))]
52092	struct Align {
52093	data: f32,
52094	}
52095	let a = _mm_set_ss(`2.0`);
52096	let mut mem = Align { data: `1.0` };
52097	_mm_mask_store_ss(&mut mem.data, `0b1`, a);
52098	assert_eq!(mem.data, `2.0`);
52099	_mm_mask_store_ss(&mut mem.data, `0b0`, a);
52100	assert_eq!(mem.data, `2.0`);
52101	}
52102
52103	#[simd_test(enable = "avx512f")]
52104	unsafe fn test_mm_mask_store_sd() {
52105	#[repr(align(`16`))]
52106	struct Align {
52107	data: f64,
52108	}
52109	let a = _mm_set_sd(`2.0`);
52110	let mut mem = Align { data: `1.0` };
52111	_mm_mask_store_sd(&mut mem.data, `0b1`, a);
52112	assert_eq!(mem.data, `2.0`);
52113	_mm_mask_store_sd(&mut mem.data, `0b0`, a);
52114	assert_eq!(mem.data, `2.0`);
52115	}
52116
52117	#[simd_test(enable = "avx512f")]
52118	unsafe fn test_mm512_setr_pd() {
52119	let r = _mm512_set_pd(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
52120	assert_eq_m512d(r, _mm512_setr_pd(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`));
52121	}
52122
52123	#[simd_test(enable = "avx512f")]
52124	unsafe fn test_mm512_set_pd() {
52125	let r = _mm512_setr_pd(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
52126	assert_eq_m512d(r, _mm512_set_pd(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`));
52127	}
52128
52129	#[simd_test(enable = "avx512f")]
52130	unsafe fn test_mm512_rol_epi32() {
52131	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52132	let r = _mm512_rol_epi32::<`1`>(a);
52133	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52134	assert_eq_m512i(r, e);
52135	}
52136
52137	#[simd_test(enable = "avx512f")]
52138	unsafe fn test_mm512_mask_rol_epi32() {
52139	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52140	let r = _mm512_mask_rol_epi32::<`1`>(a, `0`, a);
52141	assert_eq_m512i(r, a);
52142	let r = _mm512_mask_rol_epi32::<`1`>(a, `0b11111111_11111111`, a);
52143	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52144	assert_eq_m512i(r, e);
52145	}
52146
52147	#[simd_test(enable = "avx512f")]
52148	unsafe fn test_mm512_maskz_rol_epi32() {
52149	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
52150	let r = _mm512_maskz_rol_epi32::<`1`>(`0`, a);
52151	assert_eq_m512i(r, _mm512_setzero_si512());
52152	let r = _mm512_maskz_rol_epi32::<`1`>(`0b00000000_11111111`, a);
52153	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
52154	assert_eq_m512i(r, e);
52155	}
52156
52157	#[simd_test(enable = "avx512f,avx512vl")]
52158	unsafe fn test_mm256_rol_epi32() {
52159	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52160	let r = _mm256_rol_epi32::<`1`>(a);
52161	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52162	assert_eq_m256i(r, e);
52163	}
52164
52165	#[simd_test(enable = "avx512f,avx512vl")]
52166	unsafe fn test_mm256_mask_rol_epi32() {
52167	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52168	let r = _mm256_mask_rol_epi32::<`1`>(a, `0`, a);
52169	assert_eq_m256i(r, a);
52170	let r = _mm256_mask_rol_epi32::<`1`>(a, `0b11111111`, a);
52171	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52172	assert_eq_m256i(r, e);
52173	}
52174
52175	#[simd_test(enable = "avx512f,avx512vl")]
52176	unsafe fn test_mm256_maskz_rol_epi32() {
52177	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52178	let r = _mm256_maskz_rol_epi32::<`1`>(`0`, a);
52179	assert_eq_m256i(r, _mm256_setzero_si256());
52180	let r = _mm256_maskz_rol_epi32::<`1`>(`0b11111111`, a);
52181	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52182	assert_eq_m256i(r, e);
52183	}
52184
52185	#[simd_test(enable = "avx512f,avx512vl")]
52186	unsafe fn test_mm_rol_epi32() {
52187	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52188	let r = _mm_rol_epi32::<`1`>(a);
52189	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52190	assert_eq_m128i(r, e);
52191	}
52192
52193	#[simd_test(enable = "avx512f,avx512vl")]
52194	unsafe fn test_mm_mask_rol_epi32() {
52195	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52196	let r = _mm_mask_rol_epi32::<`1`>(a, `0`, a);
52197	assert_eq_m128i(r, a);
52198	let r = _mm_mask_rol_epi32::<`1`>(a, `0b00001111`, a);
52199	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52200	assert_eq_m128i(r, e);
52201	}
52202
52203	#[simd_test(enable = "avx512f,avx512vl")]
52204	unsafe fn test_mm_maskz_rol_epi32() {
52205	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52206	let r = _mm_maskz_rol_epi32::<`1`>(`0`, a);
52207	assert_eq_m128i(r, _mm_setzero_si128());
52208	let r = _mm_maskz_rol_epi32::<`1`>(`0b00001111`, a);
52209	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52210	assert_eq_m128i(r, e);
52211	}
52212
52213	#[simd_test(enable = "avx512f")]
52214	unsafe fn test_mm512_ror_epi32() {
52215	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52216	let r = _mm512_ror_epi32::<`1`>(a);
52217	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52218	assert_eq_m512i(r, e);
52219	}
52220
52221	#[simd_test(enable = "avx512f")]
52222	unsafe fn test_mm512_mask_ror_epi32() {
52223	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52224	let r = _mm512_mask_ror_epi32::<`1`>(a, `0`, a);
52225	assert_eq_m512i(r, a);
52226	let r = _mm512_mask_ror_epi32::<`1`>(a, `0b11111111_11111111`, a);
52227	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52228	assert_eq_m512i(r, e);
52229	}
52230
52231	#[simd_test(enable = "avx512f")]
52232	unsafe fn test_mm512_maskz_ror_epi32() {
52233	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
52234	let r = _mm512_maskz_ror_epi32::<`1`>(`0`, a);
52235	assert_eq_m512i(r, _mm512_setzero_si512());
52236	let r = _mm512_maskz_ror_epi32::<`1`>(`0b00000000_11111111`, a);
52237	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
52238	assert_eq_m512i(r, e);
52239	}
52240
52241	#[simd_test(enable = "avx512f,avx512vl")]
52242	unsafe fn test_mm256_ror_epi32() {
52243	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52244	let r = _mm256_ror_epi32::<`1`>(a);
52245	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52246	assert_eq_m256i(r, e);
52247	}
52248
52249	#[simd_test(enable = "avx512f,avx512vl")]
52250	unsafe fn test_mm256_mask_ror_epi32() {
52251	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52252	let r = _mm256_mask_ror_epi32::<`1`>(a, `0`, a);
52253	assert_eq_m256i(r, a);
52254	let r = _mm256_mask_ror_epi32::<`1`>(a, `0b11111111`, a);
52255	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52256	assert_eq_m256i(r, e);
52257	}
52258
52259	#[simd_test(enable = "avx512f,avx512vl")]
52260	unsafe fn test_mm256_maskz_ror_epi32() {
52261	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52262	let r = _mm256_maskz_ror_epi32::<`1`>(`0`, a);
52263	assert_eq_m256i(r, _mm256_setzero_si256());
52264	let r = _mm256_maskz_ror_epi32::<`1`>(`0b11111111`, a);
52265	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52266	assert_eq_m256i(r, e);
52267	}
52268
52269	#[simd_test(enable = "avx512f,avx512vl")]
52270	unsafe fn test_mm_ror_epi32() {
52271	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52272	let r = _mm_ror_epi32::<`1`>(a);
52273	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52274	assert_eq_m128i(r, e);
52275	}
52276
52277	#[simd_test(enable = "avx512f,avx512vl")]
52278	unsafe fn test_mm_mask_ror_epi32() {
52279	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52280	let r = _mm_mask_ror_epi32::<`1`>(a, `0`, a);
52281	assert_eq_m128i(r, a);
52282	let r = _mm_mask_ror_epi32::<`1`>(a, `0b00001111`, a);
52283	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52284	assert_eq_m128i(r, e);
52285	}
52286
52287	#[simd_test(enable = "avx512f,avx512vl")]
52288	unsafe fn test_mm_maskz_ror_epi32() {
52289	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52290	let r = _mm_maskz_ror_epi32::<`1`>(`0`, a);
52291	assert_eq_m128i(r, _mm_setzero_si128());
52292	let r = _mm_maskz_ror_epi32::<`1`>(`0b00001111`, a);
52293	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52294	assert_eq_m128i(r, e);
52295	}
52296
52297	#[simd_test(enable = "avx512f")]
52298	unsafe fn test_mm512_slli_epi32() {
52299	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52300	let r = _mm512_slli_epi32::<`1`>(a);
52301	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52302	assert_eq_m512i(r, e);
52303	}
52304
52305	#[simd_test(enable = "avx512f")]
52306	unsafe fn test_mm512_mask_slli_epi32() {
52307	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52308	let r = _mm512_mask_slli_epi32::<`1`>(a, `0`, a);
52309	assert_eq_m512i(r, a);
52310	let r = _mm512_mask_slli_epi32::<`1`>(a, `0b11111111_11111111`, a);
52311	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52312	assert_eq_m512i(r, e);
52313	}
52314
52315	#[simd_test(enable = "avx512f")]
52316	unsafe fn test_mm512_maskz_slli_epi32() {
52317	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
52318	let r = _mm512_maskz_slli_epi32::<`1`>(`0`, a);
52319	assert_eq_m512i(r, _mm512_setzero_si512());
52320	let r = _mm512_maskz_slli_epi32::<`1`>(`0b00000000_11111111`, a);
52321	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
52322	assert_eq_m512i(r, e);
52323	}
52324
52325	#[simd_test(enable = "avx512f,avx512vl")]
52326	unsafe fn test_mm256_mask_slli_epi32() {
52327	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52328	let r = _mm256_mask_slli_epi32::<`1`>(a, `0`, a);
52329	assert_eq_m256i(r, a);
52330	let r = _mm256_mask_slli_epi32::<`1`>(a, `0b11111111`, a);
52331	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52332	assert_eq_m256i(r, e);
52333	}
52334
52335	#[simd_test(enable = "avx512f,avx512vl")]
52336	unsafe fn test_mm256_maskz_slli_epi32() {
52337	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52338	let r = _mm256_maskz_slli_epi32::<`1`>(`0`, a);
52339	assert_eq_m256i(r, _mm256_setzero_si256());
52340	let r = _mm256_maskz_slli_epi32::<`1`>(`0b11111111`, a);
52341	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52342	assert_eq_m256i(r, e);
52343	}
52344
52345	#[simd_test(enable = "avx512f,avx512vl")]
52346	unsafe fn test_mm_mask_slli_epi32() {
52347	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52348	let r = _mm_mask_slli_epi32::<`1`>(a, `0`, a);
52349	assert_eq_m128i(r, a);
52350	let r = _mm_mask_slli_epi32::<`1`>(a, `0b00001111`, a);
52351	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
52352	assert_eq_m128i(r, e);
52353	}
52354
52355	#[simd_test(enable = "avx512f,avx512vl")]
52356	unsafe fn test_mm_maskz_slli_epi32() {
52357	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52358	let r = _mm_maskz_slli_epi32::<`1`>(`0`, a);
52359	assert_eq_m128i(r, _mm_setzero_si128());
52360	let r = _mm_maskz_slli_epi32::<`1`>(`0b00001111`, a);
52361	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
52362	assert_eq_m128i(r, e);
52363	}
52364
52365	#[simd_test(enable = "avx512f")]
52366	unsafe fn test_mm512_srli_epi32() {
52367	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52368	let r = _mm512_srli_epi32::<`1`>(a);
52369	let e = _mm512_set_epi32(`0` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52370	assert_eq_m512i(r, e);
52371	}
52372
52373	#[simd_test(enable = "avx512f")]
52374	unsafe fn test_mm512_mask_srli_epi32() {
52375	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52376	let r = _mm512_mask_srli_epi32::<`1`>(a, `0`, a);
52377	assert_eq_m512i(r, a);
52378	let r = _mm512_mask_srli_epi32::<`1`>(a, `0b11111111_11111111`, a);
52379	let e = _mm512_set_epi32(`0` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52380	assert_eq_m512i(r, e);
52381	}
52382
52383	#[simd_test(enable = "avx512f")]
52384	unsafe fn test_mm512_maskz_srli_epi32() {
52385	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
52386	let r = _mm512_maskz_srli_epi32::<`1`>(`0`, a);
52387	assert_eq_m512i(r, _mm512_setzero_si512());
52388	let r = _mm512_maskz_srli_epi32::<`1`>(`0b00000000_11111111`, a);
52389	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0` << `31`);
52390	assert_eq_m512i(r, e);
52391	}
52392
52393	#[simd_test(enable = "avx512f,avx512vl")]
52394	unsafe fn test_mm256_mask_srli_epi32() {
52395	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52396	let r = _mm256_mask_srli_epi32::<`1`>(a, `0`, a);
52397	assert_eq_m256i(r, a);
52398	let r = _mm256_mask_srli_epi32::<`1`>(a, `0b11111111`, a);
52399	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52400	assert_eq_m256i(r, e);
52401	}
52402
52403	#[simd_test(enable = "avx512f,avx512vl")]
52404	unsafe fn test_mm256_maskz_srli_epi32() {
52405	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52406	let r = _mm256_maskz_srli_epi32::<`1`>(`0`, a);
52407	assert_eq_m256i(r, _mm256_setzero_si256());
52408	let r = _mm256_maskz_srli_epi32::<`1`>(`0b11111111`, a);
52409	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52410	assert_eq_m256i(r, e);
52411	}
52412
52413	#[simd_test(enable = "avx512f,avx512vl")]
52414	unsafe fn test_mm_mask_srli_epi32() {
52415	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
52416	let r = _mm_mask_srli_epi32::<`1`>(a, `0`, a);
52417	assert_eq_m128i(r, a);
52418	let r = _mm_mask_srli_epi32::<`1`>(a, `0b00001111`, a);
52419	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
52420	assert_eq_m128i(r, e);
52421	}
52422
52423	#[simd_test(enable = "avx512f,avx512vl")]
52424	unsafe fn test_mm_maskz_srli_epi32() {
52425	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
52426	let r = _mm_maskz_srli_epi32::<`1`>(`0`, a);
52427	assert_eq_m128i(r, _mm_setzero_si128());
52428	let r = _mm_maskz_srli_epi32::<`1`>(`0b00001111`, a);
52429	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
52430	assert_eq_m128i(r, e);
52431	}
52432
52433	#[simd_test(enable = "avx512f")]
52434	unsafe fn test_mm512_rolv_epi32() {
52435	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52436	let b = _mm512_set1_epi32(`1`);
52437	let r = _mm512_rolv_epi32(a, b);
52438	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52439	assert_eq_m512i(r, e);
52440	}
52441
52442	#[simd_test(enable = "avx512f")]
52443	unsafe fn test_mm512_mask_rolv_epi32() {
52444	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52445	let b = _mm512_set1_epi32(`1`);
52446	let r = _mm512_mask_rolv_epi32(a, `0`, a, b);
52447	assert_eq_m512i(r, a);
52448	let r = _mm512_mask_rolv_epi32(a, `0b11111111_11111111`, a, b);
52449	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52450	assert_eq_m512i(r, e);
52451	}
52452
52453	#[simd_test(enable = "avx512f")]
52454	unsafe fn test_mm512_maskz_rolv_epi32() {
52455	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
52456	let b = _mm512_set1_epi32(`1`);
52457	let r = _mm512_maskz_rolv_epi32(`0`, a, b);
52458	assert_eq_m512i(r, _mm512_setzero_si512());
52459	let r = _mm512_maskz_rolv_epi32(`0b00000000_11111111`, a, b);
52460	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
52461	assert_eq_m512i(r, e);
52462	}
52463
52464	#[simd_test(enable = "avx512f,avx512vl")]
52465	unsafe fn test_mm256_rolv_epi32() {
52466	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52467	let b = _mm256_set1_epi32(`1`);
52468	let r = _mm256_rolv_epi32(a, b);
52469	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52470	assert_eq_m256i(r, e);
52471	}
52472
52473	#[simd_test(enable = "avx512f,avx512vl")]
52474	unsafe fn test_mm256_mask_rolv_epi32() {
52475	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52476	let b = _mm256_set1_epi32(`1`);
52477	let r = _mm256_mask_rolv_epi32(a, `0`, a, b);
52478	assert_eq_m256i(r, a);
52479	let r = _mm256_mask_rolv_epi32(a, `0b11111111`, a, b);
52480	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52481	assert_eq_m256i(r, e);
52482	}
52483
52484	#[simd_test(enable = "avx512f,avx512vl")]
52485	unsafe fn test_mm256_maskz_rolv_epi32() {
52486	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52487	let b = _mm256_set1_epi32(`1`);
52488	let r = _mm256_maskz_rolv_epi32(`0`, a, b);
52489	assert_eq_m256i(r, _mm256_setzero_si256());
52490	let r = _mm256_maskz_rolv_epi32(`0b11111111`, a, b);
52491	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52492	assert_eq_m256i(r, e);
52493	}
52494
52495	#[simd_test(enable = "avx512f,avx512vl")]
52496	unsafe fn test_mm_rolv_epi32() {
52497	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52498	let b = _mm_set1_epi32(`1`);
52499	let r = _mm_rolv_epi32(a, b);
52500	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52501	assert_eq_m128i(r, e);
52502	}
52503
52504	#[simd_test(enable = "avx512f,avx512vl")]
52505	unsafe fn test_mm_mask_rolv_epi32() {
52506	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52507	let b = _mm_set1_epi32(`1`);
52508	let r = _mm_mask_rolv_epi32(a, `0`, a, b);
52509	assert_eq_m128i(r, a);
52510	let r = _mm_mask_rolv_epi32(a, `0b00001111`, a, b);
52511	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52512	assert_eq_m128i(r, e);
52513	}
52514
52515	#[simd_test(enable = "avx512f,avx512vl")]
52516	unsafe fn test_mm_maskz_rolv_epi32() {
52517	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52518	let b = _mm_set1_epi32(`1`);
52519	let r = _mm_maskz_rolv_epi32(`0`, a, b);
52520	assert_eq_m128i(r, _mm_setzero_si128());
52521	let r = _mm_maskz_rolv_epi32(`0b00001111`, a, b);
52522	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52523	assert_eq_m128i(r, e);
52524	}
52525
52526	#[simd_test(enable = "avx512f")]
52527	unsafe fn test_mm512_rorv_epi32() {
52528	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52529	let b = _mm512_set1_epi32(`1`);
52530	let r = _mm512_rorv_epi32(a, b);
52531	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52532	assert_eq_m512i(r, e);
52533	}
52534
52535	#[simd_test(enable = "avx512f")]
52536	unsafe fn test_mm512_mask_rorv_epi32() {
52537	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52538	let b = _mm512_set1_epi32(`1`);
52539	let r = _mm512_mask_rorv_epi32(a, `0`, a, b);
52540	assert_eq_m512i(r, a);
52541	let r = _mm512_mask_rorv_epi32(a, `0b11111111_11111111`, a, b);
52542	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52543	assert_eq_m512i(r, e);
52544	}
52545
52546	#[simd_test(enable = "avx512f")]
52547	unsafe fn test_mm512_maskz_rorv_epi32() {
52548	let a = _mm512_set_epi32(`3`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
52549	let b = _mm512_set1_epi32(`1`);
52550	let r = _mm512_maskz_rorv_epi32(`0`, a, b);
52551	assert_eq_m512i(r, _mm512_setzero_si512());
52552	let r = _mm512_maskz_rorv_epi32(`0b00000000_11111111`, a, b);
52553	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
52554	assert_eq_m512i(r, e);
52555	}
52556
52557	#[simd_test(enable = "avx512f,avx512vl")]
52558	unsafe fn test_mm256_rorv_epi32() {
52559	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52560	let b = _mm256_set1_epi32(`1`);
52561	let r = _mm256_rorv_epi32(a, b);
52562	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52563	assert_eq_m256i(r, e);
52564	}
52565
52566	#[simd_test(enable = "avx512f,avx512vl")]
52567	unsafe fn test_mm256_mask_rorv_epi32() {
52568	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52569	let b = _mm256_set1_epi32(`1`);
52570	let r = _mm256_mask_rorv_epi32(a, `0`, a, b);
52571	assert_eq_m256i(r, a);
52572	let r = _mm256_mask_rorv_epi32(a, `0b11111111`, a, b);
52573	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52574	assert_eq_m256i(r, e);
52575	}
52576
52577	#[simd_test(enable = "avx512f,avx512vl")]
52578	unsafe fn test_mm256_maskz_rorv_epi32() {
52579	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52580	let b = _mm256_set1_epi32(`1`);
52581	let r = _mm256_maskz_rorv_epi32(`0`, a, b);
52582	assert_eq_m256i(r, _mm256_setzero_si256());
52583	let r = _mm256_maskz_rorv_epi32(`0b11111111`, a, b);
52584	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52585	assert_eq_m256i(r, e);
52586	}
52587
52588	#[simd_test(enable = "avx512f,avx512vl")]
52589	unsafe fn test_mm_rorv_epi32() {
52590	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52591	let b = _mm_set1_epi32(`1`);
52592	let r = _mm_rorv_epi32(a, b);
52593	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52594	assert_eq_m128i(r, e);
52595	}
52596
52597	#[simd_test(enable = "avx512f,avx512vl")]
52598	unsafe fn test_mm_mask_rorv_epi32() {
52599	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52600	let b = _mm_set1_epi32(`1`);
52601	let r = _mm_mask_rorv_epi32(a, `0`, a, b);
52602	assert_eq_m128i(r, a);
52603	let r = _mm_mask_rorv_epi32(a, `0b00001111`, a, b);
52604	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52605	assert_eq_m128i(r, e);
52606	}
52607
52608	#[simd_test(enable = "avx512f,avx512vl")]
52609	unsafe fn test_mm_maskz_rorv_epi32() {
52610	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
52611	let b = _mm_set1_epi32(`1`);
52612	let r = _mm_maskz_rorv_epi32(`0`, a, b);
52613	assert_eq_m128i(r, _mm_setzero_si128());
52614	let r = _mm_maskz_rorv_epi32(`0b00001111`, a, b);
52615	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52616	assert_eq_m128i(r, e);
52617	}
52618
52619	#[simd_test(enable = "avx512f")]
52620	unsafe fn test_mm512_sllv_epi32() {
52621	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52622	let count = _mm512_set1_epi32(`1`);
52623	let r = _mm512_sllv_epi32(a, count);
52624	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52625	assert_eq_m512i(r, e);
52626	}
52627
52628	#[simd_test(enable = "avx512f")]
52629	unsafe fn test_mm512_mask_sllv_epi32() {
52630	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52631	let count = _mm512_set1_epi32(`1`);
52632	let r = _mm512_mask_sllv_epi32(a, `0`, a, count);
52633	assert_eq_m512i(r, a);
52634	let r = _mm512_mask_sllv_epi32(a, `0b11111111_11111111`, a, count);
52635	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52636	assert_eq_m512i(r, e);
52637	}
52638
52639	#[simd_test(enable = "avx512f")]
52640	unsafe fn test_mm512_maskz_sllv_epi32() {
52641	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
52642	let count = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52643	let r = _mm512_maskz_sllv_epi32(`0`, a, count);
52644	assert_eq_m512i(r, _mm512_setzero_si512());
52645	let r = _mm512_maskz_sllv_epi32(`0b00000000_11111111`, a, count);
52646	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
52647	assert_eq_m512i(r, e);
52648	}
52649
52650	#[simd_test(enable = "avx512f,avx512vl")]
52651	unsafe fn test_mm256_mask_sllv_epi32() {
52652	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52653	let count = _mm256_set1_epi32(`1`);
52654	let r = _mm256_mask_sllv_epi32(a, `0`, a, count);
52655	assert_eq_m256i(r, a);
52656	let r = _mm256_mask_sllv_epi32(a, `0b11111111`, a, count);
52657	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52658	assert_eq_m256i(r, e);
52659	}
52660
52661	#[simd_test(enable = "avx512f,avx512vl")]
52662	unsafe fn test_mm256_maskz_sllv_epi32() {
52663	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52664	let count = _mm256_set1_epi32(`1`);
52665	let r = _mm256_maskz_sllv_epi32(`0`, a, count);
52666	assert_eq_m256i(r, _mm256_setzero_si256());
52667	let r = _mm256_maskz_sllv_epi32(`0b11111111`, a, count);
52668	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52669	assert_eq_m256i(r, e);
52670	}
52671
52672	#[simd_test(enable = "avx512f,avx512vl")]
52673	unsafe fn test_mm_mask_sllv_epi32() {
52674	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52675	let count = _mm_set1_epi32(`1`);
52676	let r = _mm_mask_sllv_epi32(a, `0`, a, count);
52677	assert_eq_m128i(r, a);
52678	let r = _mm_mask_sllv_epi32(a, `0b00001111`, a, count);
52679	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
52680	assert_eq_m128i(r, e);
52681	}
52682
52683	#[simd_test(enable = "avx512f,avx512vl")]
52684	unsafe fn test_mm_maskz_sllv_epi32() {
52685	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
52686	let count = _mm_set1_epi32(`1`);
52687	let r = _mm_maskz_sllv_epi32(`0`, a, count);
52688	assert_eq_m128i(r, _mm_setzero_si128());
52689	let r = _mm_maskz_sllv_epi32(`0b00001111`, a, count);
52690	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
52691	assert_eq_m128i(r, e);
52692	}
52693
52694	#[simd_test(enable = "avx512f")]
52695	unsafe fn test_mm512_srlv_epi32() {
52696	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52697	let count = _mm512_set1_epi32(`1`);
52698	let r = _mm512_srlv_epi32(a, count);
52699	let e = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52700	assert_eq_m512i(r, e);
52701	}
52702
52703	#[simd_test(enable = "avx512f")]
52704	unsafe fn test_mm512_mask_srlv_epi32() {
52705	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52706	let count = _mm512_set1_epi32(`1`);
52707	let r = _mm512_mask_srlv_epi32(a, `0`, a, count);
52708	assert_eq_m512i(r, a);
52709	let r = _mm512_mask_srlv_epi32(a, `0b11111111_11111111`, a, count);
52710	let e = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52711	assert_eq_m512i(r, e);
52712	}
52713
52714	#[simd_test(enable = "avx512f")]
52715	unsafe fn test_mm512_maskz_srlv_epi32() {
52716	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
52717	let count = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
52718	let r = _mm512_maskz_srlv_epi32(`0`, a, count);
52719	assert_eq_m512i(r, _mm512_setzero_si512());
52720	let r = _mm512_maskz_srlv_epi32(`0b00000000_11111111`, a, count);
52721	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`);
52722	assert_eq_m512i(r, e);
52723	}
52724
52725	#[simd_test(enable = "avx512f,avx512vl")]
52726	unsafe fn test_mm256_mask_srlv_epi32() {
52727	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52728	let count = _mm256_set1_epi32(`1`);
52729	let r = _mm256_mask_srlv_epi32(a, `0`, a, count);
52730	assert_eq_m256i(r, a);
52731	let r = _mm256_mask_srlv_epi32(a, `0b11111111`, a, count);
52732	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52733	assert_eq_m256i(r, e);
52734	}
52735
52736	#[simd_test(enable = "avx512f,avx512vl")]
52737	unsafe fn test_mm256_maskz_srlv_epi32() {
52738	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52739	let count = _mm256_set1_epi32(`1`);
52740	let r = _mm256_maskz_srlv_epi32(`0`, a, count);
52741	assert_eq_m256i(r, _mm256_setzero_si256());
52742	let r = _mm256_maskz_srlv_epi32(`0b11111111`, a, count);
52743	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52744	assert_eq_m256i(r, e);
52745	}
52746
52747	#[simd_test(enable = "avx512f,avx512vl")]
52748	unsafe fn test_mm_mask_srlv_epi32() {
52749	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
52750	let count = _mm_set1_epi32(`1`);
52751	let r = _mm_mask_srlv_epi32(a, `0`, a, count);
52752	assert_eq_m128i(r, a);
52753	let r = _mm_mask_srlv_epi32(a, `0b00001111`, a, count);
52754	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
52755	assert_eq_m128i(r, e);
52756	}
52757
52758	#[simd_test(enable = "avx512f,avx512vl")]
52759	unsafe fn test_mm_maskz_srlv_epi32() {
52760	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
52761	let count = _mm_set1_epi32(`1`);
52762	let r = _mm_maskz_srlv_epi32(`0`, a, count);
52763	assert_eq_m128i(r, _mm_setzero_si128());
52764	let r = _mm_maskz_srlv_epi32(`0b00001111`, a, count);
52765	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
52766	assert_eq_m128i(r, e);
52767	}
52768
52769	#[simd_test(enable = "avx512f")]
52770	unsafe fn test_mm512_sll_epi32() {
52771	#[rustfmt::skip]
52772	let a = _mm512_set_epi32(
52773	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
52774	`0`, `0`, `0`, `0`,
52775	`0`, `0`, `0`, `0`,
52776	`0`, `0`, `0`, `0`,
52777	);
52778	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
52779	let r = _mm512_sll_epi32(a, count);
52780	#[rustfmt::skip]
52781	let e = _mm512_set_epi32(
52782	`0`, `1` << `2`, `1` << `3`, `1` << `4`,
52783	`0`, `0`, `0`, `0`,
52784	`0`, `0`, `0`, `0`,
52785	`0`, `0`, `0`, `0`,
52786	);
52787	assert_eq_m512i(r, e);
52788	}
52789
52790	#[simd_test(enable = "avx512f")]
52791	unsafe fn test_mm512_mask_sll_epi32() {
52792	#[rustfmt::skip]
52793	let a = _mm512_set_epi32(
52794	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
52795	`0`, `0`, `0`, `0`,
52796	`0`, `0`, `0`, `0`,
52797	`0`, `0`, `0`, `0`,
52798	);
52799	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
52800	let r = _mm512_mask_sll_epi32(a, `0`, a, count);
52801	assert_eq_m512i(r, a);
52802	let r = _mm512_mask_sll_epi32(a, `0b11111111_11111111`, a, count);
52803	#[rustfmt::skip]
52804	let e = _mm512_set_epi32(
52805	`0`, `1` << `2`, `1` << `3`, `1` << `4`,
52806	`0`, `0`, `0`, `0`,
52807	`0`, `0`, `0`, `0`,
52808	`0`, `0`, `0`, `0`,
52809	);
52810	assert_eq_m512i(r, e);
52811	}
52812
52813	#[simd_test(enable = "avx512f")]
52814	unsafe fn test_mm512_maskz_sll_epi32() {
52815	#[rustfmt::skip]
52816	let a = _mm512_set_epi32(
52817	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
52818	`0`, `0`, `0`, `0`,
52819	`0`, `0`, `0`, `0`,
52820	`0`, `0`, `0`, `1` << `31`,
52821	);
52822	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
52823	let r = _mm512_maskz_sll_epi32(`0`, a, count);
52824	assert_eq_m512i(r, _mm512_setzero_si512());
52825	let r = _mm512_maskz_sll_epi32(`0b00000000_11111111`, a, count);
52826	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52827	assert_eq_m512i(r, e);
52828	}
52829
52830	#[simd_test(enable = "avx512f,avx512vl")]
52831	unsafe fn test_mm256_mask_sll_epi32() {
52832	let a = _mm256_set_epi32(`1` << `13`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52833	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52834	let r = _mm256_mask_sll_epi32(a, `0`, a, count);
52835	assert_eq_m256i(r, a);
52836	let r = _mm256_mask_sll_epi32(a, `0b11111111`, a, count);
52837	let e = _mm256_set_epi32(`1` << `14`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52838	assert_eq_m256i(r, e);
52839	}
52840
52841	#[simd_test(enable = "avx512f,avx512vl")]
52842	unsafe fn test_mm256_maskz_sll_epi32() {
52843	let a = _mm256_set_epi32(`1` << `13`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52844	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52845	let r = _mm256_maskz_sll_epi32(`0`, a, count);
52846	assert_eq_m256i(r, _mm256_setzero_si256());
52847	let r = _mm256_maskz_sll_epi32(`0b11111111`, a, count);
52848	let e = _mm256_set_epi32(`1` << `14`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52849	assert_eq_m256i(r, e);
52850	}
52851
52852	#[simd_test(enable = "avx512f,avx512vl")]
52853	unsafe fn test_mm_mask_sll_epi32() {
52854	let a = _mm_set_epi32(`1` << `13`, `0`, `0`, `0`);
52855	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52856	let r = _mm_mask_sll_epi32(a, `0`, a, count);
52857	assert_eq_m128i(r, a);
52858	let r = _mm_mask_sll_epi32(a, `0b00001111`, a, count);
52859	let e = _mm_set_epi32(`1` << `14`, `0`, `0`, `0`);
52860	assert_eq_m128i(r, e);
52861	}
52862
52863	#[simd_test(enable = "avx512f,avx512vl")]
52864	unsafe fn test_mm_maskz_sll_epi32() {
52865	let a = _mm_set_epi32(`1` << `13`, `0`, `0`, `0`);
52866	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52867	let r = _mm_maskz_sll_epi32(`0`, a, count);
52868	assert_eq_m128i(r, _mm_setzero_si128());
52869	let r = _mm_maskz_sll_epi32(`0b00001111`, a, count);
52870	let e = _mm_set_epi32(`1` << `14`, `0`, `0`, `0`);
52871	assert_eq_m128i(r, e);
52872	}
52873
52874	#[simd_test(enable = "avx512f")]
52875	unsafe fn test_mm512_srl_epi32() {
52876	#[rustfmt::skip]
52877	let a = _mm512_set_epi32(
52878	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
52879	`0`, `0`, `0`, `0`,
52880	`0`, `0`, `0`, `0`,
52881	`0`, `0`, `0`, `0`,
52882	);
52883	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
52884	let r = _mm512_srl_epi32(a, count);
52885	let e = _mm512_set_epi32(`1` << `29`, `0`, `0`, `1` << `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52886	assert_eq_m512i(r, e);
52887	}
52888
52889	#[simd_test(enable = "avx512f")]
52890	unsafe fn test_mm512_mask_srl_epi32() {
52891	#[rustfmt::skip]
52892	let a = _mm512_set_epi32(
52893	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
52894	`0`, `0`, `0`, `0`,
52895	`0`, `0`, `0`, `0`,
52896	`0`, `0`, `0`, `0`,
52897	);
52898	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
52899	let r = _mm512_mask_srl_epi32(a, `0`, a, count);
52900	assert_eq_m512i(r, a);
52901	let r = _mm512_mask_srl_epi32(a, `0b11111111_11111111`, a, count);
52902	let e = _mm512_set_epi32(`1` << `29`, `0`, `0`, `1` << `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52903	assert_eq_m512i(r, e);
52904	}
52905
52906	#[simd_test(enable = "avx512f")]
52907	unsafe fn test_mm512_maskz_srl_epi32() {
52908	#[rustfmt::skip]
52909	let a = _mm512_set_epi32(
52910	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
52911	`0`, `0`, `0`, `0`,
52912	`0`, `0`, `0`, `0`,
52913	`0`, `0`, `0`, `1` << `31`,
52914	);
52915	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
52916	let r = _mm512_maskz_srl_epi32(`0`, a, count);
52917	assert_eq_m512i(r, _mm512_setzero_si512());
52918	let r = _mm512_maskz_srl_epi32(`0b00000000_11111111`, a, count);
52919	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `29`);
52920	assert_eq_m512i(r, e);
52921	}
52922
52923	#[simd_test(enable = "avx512f,avx512vl")]
52924	unsafe fn test_mm256_mask_srl_epi32() {
52925	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52926	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52927	let r = _mm256_mask_srl_epi32(a, `0`, a, count);
52928	assert_eq_m256i(r, a);
52929	let r = _mm256_mask_srl_epi32(a, `0b11111111`, a, count);
52930	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52931	assert_eq_m256i(r, e);
52932	}
52933
52934	#[simd_test(enable = "avx512f,avx512vl")]
52935	unsafe fn test_mm256_maskz_srl_epi32() {
52936	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52937	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52938	let r = _mm256_maskz_srl_epi32(`0`, a, count);
52939	assert_eq_m256i(r, _mm256_setzero_si256());
52940	let r = _mm256_maskz_srl_epi32(`0b11111111`, a, count);
52941	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52942	assert_eq_m256i(r, e);
52943	}
52944
52945	#[simd_test(enable = "avx512f,avx512vl")]
52946	unsafe fn test_mm_mask_srl_epi32() {
52947	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
52948	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52949	let r = _mm_mask_srl_epi32(a, `0`, a, count);
52950	assert_eq_m128i(r, a);
52951	let r = _mm_mask_srl_epi32(a, `0b00001111`, a, count);
52952	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
52953	assert_eq_m128i(r, e);
52954	}
52955
52956	#[simd_test(enable = "avx512f,avx512vl")]
52957	unsafe fn test_mm_maskz_srl_epi32() {
52958	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
52959	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
52960	let r = _mm_maskz_srl_epi32(`0`, a, count);
52961	assert_eq_m128i(r, _mm_setzero_si128());
52962	let r = _mm_maskz_srl_epi32(`0b00001111`, a, count);
52963	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
52964	assert_eq_m128i(r, e);
52965	}
52966
52967	#[simd_test(enable = "avx512f")]
52968	unsafe fn test_mm512_sra_epi32() {
52969	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
52970	let count = _mm_set_epi32(`1`, `0`, `0`, `2`);
52971	let r = _mm512_sra_epi32(a, count);
52972	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
52973	assert_eq_m512i(r, e);
52974	}
52975
52976	#[simd_test(enable = "avx512f")]
52977	unsafe fn test_mm512_mask_sra_epi32() {
52978	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`);
52979	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
52980	let r = _mm512_mask_sra_epi32(a, `0`, a, count);
52981	assert_eq_m512i(r, a);
52982	let r = _mm512_mask_sra_epi32(a, `0b11111111_11111111`, a, count);
52983	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`);
52984	assert_eq_m512i(r, e);
52985	}
52986
52987	#[simd_test(enable = "avx512f")]
52988	unsafe fn test_mm512_maskz_sra_epi32() {
52989	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-15`, `-14`);
52990	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
52991	let r = _mm512_maskz_sra_epi32(`0`, a, count);
52992	assert_eq_m512i(r, _mm512_setzero_si512());
52993	let r = _mm512_maskz_sra_epi32(`0b00000000_11111111`, a, count);
52994	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-4`, `-4`);
52995	assert_eq_m512i(r, e);
52996	}
52997
52998	#[simd_test(enable = "avx512f,avx512vl")]
52999	unsafe fn test_mm256_mask_sra_epi32() {
53000	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53001	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
53002	let r = _mm256_mask_sra_epi32(a, `0`, a, count);
53003	assert_eq_m256i(r, a);
53004	let r = _mm256_mask_sra_epi32(a, `0b11111111`, a, count);
53005	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53006	assert_eq_m256i(r, e);
53007	}
53008
53009	#[simd_test(enable = "avx512f,avx512vl")]
53010	unsafe fn test_mm256_maskz_sra_epi32() {
53011	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53012	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
53013	let r = _mm256_maskz_sra_epi32(`0`, a, count);
53014	assert_eq_m256i(r, _mm256_setzero_si256());
53015	let r = _mm256_maskz_sra_epi32(`0b11111111`, a, count);
53016	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53017	assert_eq_m256i(r, e);
53018	}
53019
53020	#[simd_test(enable = "avx512f,avx512vl")]
53021	unsafe fn test_mm_mask_sra_epi32() {
53022	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
53023	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
53024	let r = _mm_mask_sra_epi32(a, `0`, a, count);
53025	assert_eq_m128i(r, a);
53026	let r = _mm_mask_sra_epi32(a, `0b00001111`, a, count);
53027	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
53028	assert_eq_m128i(r, e);
53029	}
53030
53031	#[simd_test(enable = "avx512f,avx512vl")]
53032	unsafe fn test_mm_maskz_sra_epi32() {
53033	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
53034	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
53035	let r = _mm_maskz_sra_epi32(`0`, a, count);
53036	assert_eq_m128i(r, _mm_setzero_si128());
53037	let r = _mm_maskz_sra_epi32(`0b00001111`, a, count);
53038	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
53039	assert_eq_m128i(r, e);
53040	}
53041
53042	#[simd_test(enable = "avx512f")]
53043	unsafe fn test_mm512_srav_epi32() {
53044	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
53045	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53046	let r = _mm512_srav_epi32(a, count);
53047	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
53048	assert_eq_m512i(r, e);
53049	}
53050
53051	#[simd_test(enable = "avx512f")]
53052	unsafe fn test_mm512_mask_srav_epi32() {
53053	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`);
53054	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
53055	let r = _mm512_mask_srav_epi32(a, `0`, a, count);
53056	assert_eq_m512i(r, a);
53057	let r = _mm512_mask_srav_epi32(a, `0b11111111_11111111`, a, count);
53058	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`);
53059	assert_eq_m512i(r, e);
53060	}
53061
53062	#[simd_test(enable = "avx512f")]
53063	unsafe fn test_mm512_maskz_srav_epi32() {
53064	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-15`, `-14`);
53065	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`);
53066	let r = _mm512_maskz_srav_epi32(`0`, a, count);
53067	assert_eq_m512i(r, _mm512_setzero_si512());
53068	let r = _mm512_maskz_srav_epi32(`0b00000000_11111111`, a, count);
53069	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-4`, `-4`);
53070	assert_eq_m512i(r, e);
53071	}
53072
53073	#[simd_test(enable = "avx512f,avx512vl")]
53074	unsafe fn test_mm256_mask_srav_epi32() {
53075	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53076	let count = _mm256_set1_epi32(`1`);
53077	let r = _mm256_mask_srav_epi32(a, `0`, a, count);
53078	assert_eq_m256i(r, a);
53079	let r = _mm256_mask_srav_epi32(a, `0b11111111`, a, count);
53080	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53081	assert_eq_m256i(r, e);
53082	}
53083
53084	#[simd_test(enable = "avx512f,avx512vl")]
53085	unsafe fn test_mm256_maskz_srav_epi32() {
53086	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53087	let count = _mm256_set1_epi32(`1`);
53088	let r = _mm256_maskz_srav_epi32(`0`, a, count);
53089	assert_eq_m256i(r, _mm256_setzero_si256());
53090	let r = _mm256_maskz_srav_epi32(`0b11111111`, a, count);
53091	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53092	assert_eq_m256i(r, e);
53093	}
53094
53095	#[simd_test(enable = "avx512f,avx512vl")]
53096	unsafe fn test_mm_mask_srav_epi32() {
53097	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
53098	let count = _mm_set1_epi32(`1`);
53099	let r = _mm_mask_srav_epi32(a, `0`, a, count);
53100	assert_eq_m128i(r, a);
53101	let r = _mm_mask_srav_epi32(a, `0b00001111`, a, count);
53102	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
53103	assert_eq_m128i(r, e);
53104	}
53105
53106	#[simd_test(enable = "avx512f,avx512vl")]
53107	unsafe fn test_mm_maskz_srav_epi32() {
53108	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
53109	let count = _mm_set1_epi32(`1`);
53110	let r = _mm_maskz_srav_epi32(`0`, a, count);
53111	assert_eq_m128i(r, _mm_setzero_si128());
53112	let r = _mm_maskz_srav_epi32(`0b00001111`, a, count);
53113	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
53114	assert_eq_m128i(r, e);
53115	}
53116
53117	#[simd_test(enable = "avx512f")]
53118	unsafe fn test_mm512_srai_epi32() {
53119	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`, `-15`);
53120	let r = _mm512_srai_epi32::<`2`>(a);
53121	let e = _mm512_set_epi32(`2`, `-2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `-4`);
53122	assert_eq_m512i(r, e);
53123	}
53124
53125	#[simd_test(enable = "avx512f")]
53126	unsafe fn test_mm512_mask_srai_epi32() {
53127	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `15`, `-15`);
53128	let r = _mm512_mask_srai_epi32::<`2`>(a, `0`, a);
53129	assert_eq_m512i(r, a);
53130	let r = _mm512_mask_srai_epi32::<`2`>(a, `0b11111111_11111111`, a);
53131	let e = _mm512_set_epi32(`2`, `-2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `-4`);
53132	assert_eq_m512i(r, e);
53133	}
53134
53135	#[simd_test(enable = "avx512f")]
53136	unsafe fn test_mm512_maskz_srai_epi32() {
53137	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `15`, `-15`);
53138	let r = _mm512_maskz_srai_epi32::<`2`>(`0`, a);
53139	assert_eq_m512i(r, _mm512_setzero_si512());
53140	let r = _mm512_maskz_srai_epi32::<`2`>(`0b00000000_11111111`, a);
53141	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `-4`);
53142	assert_eq_m512i(r, e);
53143	}
53144
53145	#[simd_test(enable = "avx512f,avx512vl")]
53146	unsafe fn test_mm256_mask_srai_epi32() {
53147	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53148	let r = _mm256_mask_srai_epi32::<`1`>(a, `0`, a);
53149	assert_eq_m256i(r, a);
53150	let r = _mm256_mask_srai_epi32::<`1`>(a, `0b11111111`, a);
53151	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53152	assert_eq_m256i(r, e);
53153	}
53154
53155	#[simd_test(enable = "avx512f,avx512vl")]
53156	unsafe fn test_mm256_maskz_srai_epi32() {
53157	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53158	let r = _mm256_maskz_srai_epi32::<`1`>(`0`, a);
53159	assert_eq_m256i(r, _mm256_setzero_si256());
53160	let r = _mm256_maskz_srai_epi32::<`1`>(`0b11111111`, a);
53161	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53162	assert_eq_m256i(r, e);
53163	}
53164
53165	#[simd_test(enable = "avx512f,avx512vl")]
53166	unsafe fn test_mm_mask_srai_epi32() {
53167	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
53168	let r = _mm_mask_srai_epi32::<`1`>(a, `0`, a);
53169	assert_eq_m128i(r, a);
53170	let r = _mm_mask_srai_epi32::<`1`>(a, `0b00001111`, a);
53171	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
53172	assert_eq_m128i(r, e);
53173	}
53174
53175	#[simd_test(enable = "avx512f,avx512vl")]
53176	unsafe fn test_mm_maskz_srai_epi32() {
53177	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
53178	let r = _mm_maskz_srai_epi32::<`1`>(`0`, a);
53179	assert_eq_m128i(r, _mm_setzero_si128());
53180	let r = _mm_maskz_srai_epi32::<`1`>(`0b00001111`, a);
53181	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
53182	assert_eq_m128i(r, e);
53183	}
53184
53185	#[simd_test(enable = "avx512f")]
53186	unsafe fn test_mm512_permute_ps() {
53187	let a = _mm512_setr_ps(
53188	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53189	);
53190	let r = _mm512_permute_ps::<`0b11_11_11_11`>(a);
53191	let e = _mm512_setr_ps(
53192	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
53193	);
53194	assert_eq_m512(r, e);
53195	}
53196
53197	#[simd_test(enable = "avx512f")]
53198	unsafe fn test_mm512_mask_permute_ps() {
53199	let a = _mm512_setr_ps(
53200	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53201	);
53202	let r = _mm512_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
53203	assert_eq_m512(r, a);
53204	let r = _mm512_mask_permute_ps::<`0b11_11_11_11`>(a, `0b11111111_11111111`, a);
53205	let e = _mm512_setr_ps(
53206	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
53207	);
53208	assert_eq_m512(r, e);
53209	}
53210
53211	#[simd_test(enable = "avx512f")]
53212	unsafe fn test_mm512_maskz_permute_ps() {
53213	let a = _mm512_setr_ps(
53214	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53215	);
53216	let r = _mm512_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
53217	assert_eq_m512(r, _mm512_setzero_ps());
53218	let r = _mm512_maskz_permute_ps::<`0b11_11_11_11`>(`0b11111111_11111111`, a);
53219	let e = _mm512_setr_ps(
53220	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
53221	);
53222	assert_eq_m512(r, e);
53223	}
53224
53225	#[simd_test(enable = "avx512f,avx512vl")]
53226	unsafe fn test_mm256_mask_permute_ps() {
53227	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53228	let r = _mm256_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
53229	assert_eq_m256(r, a);
53230	let r = _mm256_mask_permute_ps::<`0b11_11_11_11`>(a, `0b11111111`, a);
53231	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `4.`, `4.`, `4.`, `4.`);
53232	assert_eq_m256(r, e);
53233	}
53234
53235	#[simd_test(enable = "avx512f,avx512vl")]
53236	unsafe fn test_mm256_maskz_permute_ps() {
53237	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53238	let r = _mm256_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
53239	assert_eq_m256(r, _mm256_setzero_ps());
53240	let r = _mm256_maskz_permute_ps::<`0b11_11_11_11`>(`0b11111111`, a);
53241	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `4.`, `4.`, `4.`, `4.`);
53242	assert_eq_m256(r, e);
53243	}
53244
53245	#[simd_test(enable = "avx512f,avx512vl")]
53246	unsafe fn test_mm_mask_permute_ps() {
53247	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53248	let r = _mm_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
53249	assert_eq_m128(r, a);
53250	let r = _mm_mask_permute_ps::<`0b11_11_11_11`>(a, `0b00001111`, a);
53251	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.`);
53252	assert_eq_m128(r, e);
53253	}
53254
53255	#[simd_test(enable = "avx512f,avx512vl")]
53256	unsafe fn test_mm_maskz_permute_ps() {
53257	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53258	let r = _mm_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
53259	assert_eq_m128(r, _mm_setzero_ps());
53260	let r = _mm_maskz_permute_ps::<`0b11_11_11_11`>(`0b00001111`, a);
53261	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.`);
53262	assert_eq_m128(r, e);
53263	}
53264
53265	#[simd_test(enable = "avx512f")]
53266	unsafe fn test_mm512_permutevar_epi32() {
53267	let idx = _mm512_set1_epi32(`1`);
53268	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53269	let r = _mm512_permutevar_epi32(idx, a);
53270	let e = _mm512_set1_epi32(`14`);
53271	assert_eq_m512i(r, e);
53272	}
53273
53274	#[simd_test(enable = "avx512f")]
53275	unsafe fn test_mm512_mask_permutevar_epi32() {
53276	let idx = _mm512_set1_epi32(`1`);
53277	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53278	let r = _mm512_mask_permutevar_epi32(a, `0`, idx, a);
53279	assert_eq_m512i(r, a);
53280	let r = _mm512_mask_permutevar_epi32(a, `0b11111111_11111111`, idx, a);
53281	let e = _mm512_set1_epi32(`14`);
53282	assert_eq_m512i(r, e);
53283	}
53284
53285	#[simd_test(enable = "avx512f")]
53286	unsafe fn test_mm512_permutevar_ps() {
53287	let a = _mm512_set_ps(
53288	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53289	);
53290	let b = _mm512_set1_epi32(`0b01`);
53291	let r = _mm512_permutevar_ps(a, b);
53292	let e = _mm512_set_ps(
53293	`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
53294	);
53295	assert_eq_m512(r, e);
53296	}
53297
53298	#[simd_test(enable = "avx512f")]
53299	unsafe fn test_mm512_mask_permutevar_ps() {
53300	let a = _mm512_set_ps(
53301	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53302	);
53303	let b = _mm512_set1_epi32(`0b01`);
53304	let r = _mm512_mask_permutevar_ps(a, `0`, a, b);
53305	assert_eq_m512(r, a);
53306	let r = _mm512_mask_permutevar_ps(a, `0b11111111_11111111`, a, b);
53307	let e = _mm512_set_ps(
53308	`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
53309	);
53310	assert_eq_m512(r, e);
53311	}
53312
53313	#[simd_test(enable = "avx512f")]
53314	unsafe fn test_mm512_maskz_permutevar_ps() {
53315	let a = _mm512_set_ps(
53316	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53317	);
53318	let b = _mm512_set1_epi32(`0b01`);
53319	let r = _mm512_maskz_permutevar_ps(`0`, a, b);
53320	assert_eq_m512(r, _mm512_setzero_ps());
53321	let r = _mm512_maskz_permutevar_ps(`0b00000000_11111111`, a, b);
53322	let e = _mm512_set_ps(
53323	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
53324	);
53325	assert_eq_m512(r, e);
53326	}
53327
53328	#[simd_test(enable = "avx512f,avx512vl")]
53329	unsafe fn test_mm256_mask_permutevar_ps() {
53330	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53331	let b = _mm256_set1_epi32(`0b01`);
53332	let r = _mm256_mask_permutevar_ps(a, `0`, a, b);
53333	assert_eq_m256(r, a);
53334	let r = _mm256_mask_permutevar_ps(a, `0b11111111`, a, b);
53335	let e = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`);
53336	assert_eq_m256(r, e);
53337	}
53338
53339	#[simd_test(enable = "avx512f,avx512vl")]
53340	unsafe fn test_mm256_maskz_permutevar_ps() {
53341	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53342	let b = _mm256_set1_epi32(`0b01`);
53343	let r = _mm256_maskz_permutevar_ps(`0`, a, b);
53344	assert_eq_m256(r, _mm256_setzero_ps());
53345	let r = _mm256_maskz_permutevar_ps(`0b11111111`, a, b);
53346	let e = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`);
53347	assert_eq_m256(r, e);
53348	}
53349
53350	#[simd_test(enable = "avx512f,avx512vl")]
53351	unsafe fn test_mm_mask_permutevar_ps() {
53352	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53353	let b = _mm_set1_epi32(`0b01`);
53354	let r = _mm_mask_permutevar_ps(a, `0`, a, b);
53355	assert_eq_m128(r, a);
53356	let r = _mm_mask_permutevar_ps(a, `0b00001111`, a, b);
53357	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
53358	assert_eq_m128(r, e);
53359	}
53360
53361	#[simd_test(enable = "avx512f,avx512vl")]
53362	unsafe fn test_mm_maskz_permutevar_ps() {
53363	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53364	let b = _mm_set1_epi32(`0b01`);
53365	let r = _mm_maskz_permutevar_ps(`0`, a, b);
53366	assert_eq_m128(r, _mm_setzero_ps());
53367	let r = _mm_maskz_permutevar_ps(`0b00001111`, a, b);
53368	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
53369	assert_eq_m128(r, e);
53370	}
53371
53372	#[simd_test(enable = "avx512f")]
53373	unsafe fn test_mm512_permutexvar_epi32() {
53374	let idx = _mm512_set1_epi32(`1`);
53375	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53376	let r = _mm512_permutexvar_epi32(idx, a);
53377	let e = _mm512_set1_epi32(`14`);
53378	assert_eq_m512i(r, e);
53379	}
53380
53381	#[simd_test(enable = "avx512f")]
53382	unsafe fn test_mm512_mask_permutexvar_epi32() {
53383	let idx = _mm512_set1_epi32(`1`);
53384	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53385	let r = _mm512_mask_permutexvar_epi32(a, `0`, idx, a);
53386	assert_eq_m512i(r, a);
53387	let r = _mm512_mask_permutexvar_epi32(a, `0b11111111_11111111`, idx, a);
53388	let e = _mm512_set1_epi32(`14`);
53389	assert_eq_m512i(r, e);
53390	}
53391
53392	#[simd_test(enable = "avx512f")]
53393	unsafe fn test_mm512_maskz_permutexvar_epi32() {
53394	let idx = _mm512_set1_epi32(`1`);
53395	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53396	let r = _mm512_maskz_permutexvar_epi32(`0`, idx, a);
53397	assert_eq_m512i(r, _mm512_setzero_si512());
53398	let r = _mm512_maskz_permutexvar_epi32(`0b00000000_11111111`, idx, a);
53399	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `14`, `14`, `14`, `14`, `14`, `14`, `14`, `14`);
53400	assert_eq_m512i(r, e);
53401	}
53402
53403	#[simd_test(enable = "avx512f,avx512vl")]
53404	unsafe fn test_mm256_permutexvar_epi32() {
53405	let idx = _mm256_set1_epi32(`1`);
53406	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53407	let r = _mm256_permutexvar_epi32(idx, a);
53408	let e = _mm256_set1_epi32(`6`);
53409	assert_eq_m256i(r, e);
53410	}
53411
53412	#[simd_test(enable = "avx512f,avx512vl")]
53413	unsafe fn test_mm256_mask_permutexvar_epi32() {
53414	let idx = _mm256_set1_epi32(`1`);
53415	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53416	let r = _mm256_mask_permutexvar_epi32(a, `0`, idx, a);
53417	assert_eq_m256i(r, a);
53418	let r = _mm256_mask_permutexvar_epi32(a, `0b11111111`, idx, a);
53419	let e = _mm256_set1_epi32(`6`);
53420	assert_eq_m256i(r, e);
53421	}
53422
53423	#[simd_test(enable = "avx512f,avx512vl")]
53424	unsafe fn test_mm256_maskz_permutexvar_epi32() {
53425	let idx = _mm256_set1_epi32(`1`);
53426	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53427	let r = _mm256_maskz_permutexvar_epi32(`0`, idx, a);
53428	assert_eq_m256i(r, _mm256_setzero_si256());
53429	let r = _mm256_maskz_permutexvar_epi32(`0b11111111`, idx, a);
53430	let e = _mm256_set1_epi32(`6`);
53431	assert_eq_m256i(r, e);
53432	}
53433
53434	#[simd_test(enable = "avx512f")]
53435	unsafe fn test_mm512_permutexvar_ps() {
53436	let idx = _mm512_set1_epi32(`1`);
53437	let a = _mm512_set_ps(
53438	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53439	);
53440	let r = _mm512_permutexvar_ps(idx, a);
53441	let e = _mm512_set1_ps(`14.`);
53442	assert_eq_m512(r, e);
53443	}
53444
53445	#[simd_test(enable = "avx512f")]
53446	unsafe fn test_mm512_mask_permutexvar_ps() {
53447	let idx = _mm512_set1_epi32(`1`);
53448	let a = _mm512_set_ps(
53449	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53450	);
53451	let r = _mm512_mask_permutexvar_ps(a, `0`, idx, a);
53452	assert_eq_m512(r, a);
53453	let r = _mm512_mask_permutexvar_ps(a, `0b11111111_11111111`, idx, a);
53454	let e = _mm512_set1_ps(`14.`);
53455	assert_eq_m512(r, e);
53456	}
53457
53458	#[simd_test(enable = "avx512f")]
53459	unsafe fn test_mm512_maskz_permutexvar_ps() {
53460	let idx = _mm512_set1_epi32(`1`);
53461	let a = _mm512_set_ps(
53462	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53463	);
53464	let r = _mm512_maskz_permutexvar_ps(`0`, idx, a);
53465	assert_eq_m512(r, _mm512_setzero_ps());
53466	let r = _mm512_maskz_permutexvar_ps(`0b00000000_11111111`, idx, a);
53467	let e = _mm512_set_ps(
53468	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`,
53469	);
53470	assert_eq_m512(r, e);
53471	}
53472
53473	#[simd_test(enable = "avx512f,avx512vl")]
53474	unsafe fn test_mm256_permutexvar_ps() {
53475	let idx = _mm256_set1_epi32(`1`);
53476	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53477	let r = _mm256_permutexvar_ps(idx, a);
53478	let e = _mm256_set1_ps(`6.`);
53479	assert_eq_m256(r, e);
53480	}
53481
53482	#[simd_test(enable = "avx512f,avx512vl")]
53483	unsafe fn test_mm256_mask_permutexvar_ps() {
53484	let idx = _mm256_set1_epi32(`1`);
53485	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53486	let r = _mm256_mask_permutexvar_ps(a, `0`, idx, a);
53487	assert_eq_m256(r, a);
53488	let r = _mm256_mask_permutexvar_ps(a, `0b11111111`, idx, a);
53489	let e = _mm256_set1_ps(`6.`);
53490	assert_eq_m256(r, e);
53491	}
53492
53493	#[simd_test(enable = "avx512f,avx512vl")]
53494	unsafe fn test_mm256_maskz_permutexvar_ps() {
53495	let idx = _mm256_set1_epi32(`1`);
53496	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53497	let r = _mm256_maskz_permutexvar_ps(`0`, idx, a);
53498	assert_eq_m256(r, _mm256_setzero_ps());
53499	let r = _mm256_maskz_permutexvar_ps(`0b11111111`, idx, a);
53500	let e = _mm256_set1_ps(`6.`);
53501	assert_eq_m256(r, e);
53502	}
53503
53504	#[simd_test(enable = "avx512f")]
53505	unsafe fn test_mm512_permutex2var_epi32() {
53506	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53507	#[rustfmt::skip]
53508	let idx = _mm512_set_epi32(
53509	`1`, `1` << `4`, `2`, `1` << `4`,
53510	`3`, `1` << `4`, `4`, `1` << `4`,
53511	`5`, `1` << `4`, `6`, `1` << `4`,
53512	`7`, `1` << `4`, `8`, `1` << `4`,
53513	);
53514	let b = _mm512_set1_epi32(`100`);
53515	let r = _mm512_permutex2var_epi32(a, idx, b);
53516	let e = _mm512_set_epi32(
53517	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
53518	);
53519	assert_eq_m512i(r, e);
53520	}
53521
53522	#[simd_test(enable = "avx512f")]
53523	unsafe fn test_mm512_mask_permutex2var_epi32() {
53524	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53525	#[rustfmt::skip]
53526	let idx = _mm512_set_epi32(
53527	`1`, `1` << `4`, `2`, `1` << `4`,
53528	`3`, `1` << `4`, `4`, `1` << `4`,
53529	`5`, `1` << `4`, `6`, `1` << `4`,
53530	`7`, `1` << `4`, `8`, `1` << `4`,
53531	);
53532	let b = _mm512_set1_epi32(`100`);
53533	let r = _mm512_mask_permutex2var_epi32(a, `0`, idx, b);
53534	assert_eq_m512i(r, a);
53535	let r = _mm512_mask_permutex2var_epi32(a, `0b11111111_11111111`, idx, b);
53536	let e = _mm512_set_epi32(
53537	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
53538	);
53539	assert_eq_m512i(r, e);
53540	}
53541
53542	#[simd_test(enable = "avx512f")]
53543	unsafe fn test_mm512_maskz_permutex2var_epi32() {
53544	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53545	#[rustfmt::skip]
53546	let idx = _mm512_set_epi32(
53547	`1`, `1` << `4`, `2`, `1` << `4`,
53548	`3`, `1` << `4`, `4`, `1` << `4`,
53549	`5`, `1` << `4`, `6`, `1` << `4`,
53550	`7`, `1` << `4`, `8`, `1` << `4`,
53551	);
53552	let b = _mm512_set1_epi32(`100`);
53553	let r = _mm512_maskz_permutex2var_epi32(`0`, a, idx, b);
53554	assert_eq_m512i(r, _mm512_setzero_si512());
53555	let r = _mm512_maskz_permutex2var_epi32(`0b00000000_11111111`, a, idx, b);
53556	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`);
53557	assert_eq_m512i(r, e);
53558	}
53559
53560	#[simd_test(enable = "avx512f")]
53561	unsafe fn test_mm512_mask2_permutex2var_epi32() {
53562	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
53563	#[rustfmt::skip]
53564	let idx = _mm512_set_epi32(
53565	`1000`, `1` << `4`, `2000`, `1` << `4`,
53566	`3000`, `1` << `4`, `4000`, `1` << `4`,
53567	`5`, `1` << `4`, `6`, `1` << `4`,
53568	`7`, `1` << `4`, `8`, `1` << `4`,
53569	);
53570	let b = _mm512_set1_epi32(`100`);
53571	let r = _mm512_mask2_permutex2var_epi32(a, idx, `0`, b);
53572	assert_eq_m512i(r, idx);
53573	let r = _mm512_mask2_permutex2var_epi32(a, idx, `0b00000000_11111111`, b);
53574	#[rustfmt::skip]
53575	let e = _mm512_set_epi32(
53576	`1000`, `1` << `4`, `2000`, `1` << `4`,
53577	`3000`, `1` << `4`, `4000`, `1` << `4`,
53578	`10`, `100`, `9`, `100`,
53579	`8`, `100`, `7`, `100`,
53580	);
53581	assert_eq_m512i(r, e);
53582	}
53583
53584	#[simd_test(enable = "avx512f,avx512vl")]
53585	unsafe fn test_mm256_permutex2var_epi32() {
53586	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53587	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53588	let b = _mm256_set1_epi32(`100`);
53589	let r = _mm256_permutex2var_epi32(a, idx, b);
53590	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
53591	assert_eq_m256i(r, e);
53592	}
53593
53594	#[simd_test(enable = "avx512f,avx512vl")]
53595	unsafe fn test_mm256_mask_permutex2var_epi32() {
53596	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53597	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53598	let b = _mm256_set1_epi32(`100`);
53599	let r = _mm256_mask_permutex2var_epi32(a, `0`, idx, b);
53600	assert_eq_m256i(r, a);
53601	let r = _mm256_mask_permutex2var_epi32(a, `0b11111111`, idx, b);
53602	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
53603	assert_eq_m256i(r, e);
53604	}
53605
53606	#[simd_test(enable = "avx512f,avx512vl")]
53607	unsafe fn test_mm256_maskz_permutex2var_epi32() {
53608	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53609	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53610	let b = _mm256_set1_epi32(`100`);
53611	let r = _mm256_maskz_permutex2var_epi32(`0`, a, idx, b);
53612	assert_eq_m256i(r, _mm256_setzero_si256());
53613	let r = _mm256_maskz_permutex2var_epi32(`0b11111111`, a, idx, b);
53614	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
53615	assert_eq_m256i(r, e);
53616	}
53617
53618	#[simd_test(enable = "avx512f,avx512vl")]
53619	unsafe fn test_mm256_mask2_permutex2var_epi32() {
53620	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
53621	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53622	let b = _mm256_set1_epi32(`100`);
53623	let r = _mm256_mask2_permutex2var_epi32(a, idx, `0`, b);
53624	assert_eq_m256i(r, idx);
53625	let r = _mm256_mask2_permutex2var_epi32(a, idx, `0b11111111`, b);
53626	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
53627	assert_eq_m256i(r, e);
53628	}
53629
53630	#[simd_test(enable = "avx512f,avx512vl")]
53631	unsafe fn test_mm_permutex2var_epi32() {
53632	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
53633	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53634	let b = _mm_set1_epi32(`100`);
53635	let r = _mm_permutex2var_epi32(a, idx, b);
53636	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
53637	assert_eq_m128i(r, e);
53638	}
53639
53640	#[simd_test(enable = "avx512f,avx512vl")]
53641	unsafe fn test_mm_mask_permutex2var_epi32() {
53642	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
53643	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53644	let b = _mm_set1_epi32(`100`);
53645	let r = _mm_mask_permutex2var_epi32(a, `0`, idx, b);
53646	assert_eq_m128i(r, a);
53647	let r = _mm_mask_permutex2var_epi32(a, `0b00001111`, idx, b);
53648	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
53649	assert_eq_m128i(r, e);
53650	}
53651
53652	#[simd_test(enable = "avx512f,avx512vl")]
53653	unsafe fn test_mm_maskz_permutex2var_epi32() {
53654	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
53655	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53656	let b = _mm_set1_epi32(`100`);
53657	let r = _mm_maskz_permutex2var_epi32(`0`, a, idx, b);
53658	assert_eq_m128i(r, _mm_setzero_si128());
53659	let r = _mm_maskz_permutex2var_epi32(`0b00001111`, a, idx, b);
53660	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
53661	assert_eq_m128i(r, e);
53662	}
53663
53664	#[simd_test(enable = "avx512f,avx512vl")]
53665	unsafe fn test_mm_mask2_permutex2var_epi32() {
53666	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
53667	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53668	let b = _mm_set1_epi32(`100`);
53669	let r = _mm_mask2_permutex2var_epi32(a, idx, `0`, b);
53670	assert_eq_m128i(r, idx);
53671	let r = _mm_mask2_permutex2var_epi32(a, idx, `0b00001111`, b);
53672	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
53673	assert_eq_m128i(r, e);
53674	}
53675
53676	#[simd_test(enable = "avx512f")]
53677	unsafe fn test_mm512_permutex2var_ps() {
53678	let a = _mm512_set_ps(
53679	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53680	);
53681	#[rustfmt::skip]
53682	let idx = _mm512_set_epi32(
53683	`1`, `1` << `4`, `2`, `1` << `4`,
53684	`3`, `1` << `4`, `4`, `1` << `4`,
53685	`5`, `1` << `4`, `6`, `1` << `4`,
53686	`7`, `1` << `4`, `8`, `1` << `4`,
53687	);
53688	let b = _mm512_set1_ps(`100.`);
53689	let r = _mm512_permutex2var_ps(a, idx, b);
53690	let e = _mm512_set_ps(
53691	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
53692	);
53693	assert_eq_m512(r, e);
53694	}
53695
53696	#[simd_test(enable = "avx512f")]
53697	unsafe fn test_mm512_mask_permutex2var_ps() {
53698	let a = _mm512_set_ps(
53699	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53700	);
53701	#[rustfmt::skip]
53702	let idx = _mm512_set_epi32(
53703	`1`, `1` << `4`, `2`, `1` << `4`,
53704	`3`, `1` << `4`, `4`, `1` << `4`,
53705	`5`, `1` << `4`, `6`, `1` << `4`,
53706	`7`, `1` << `4`, `8`, `1` << `4`,
53707	);
53708	let b = _mm512_set1_ps(`100.`);
53709	let r = _mm512_mask_permutex2var_ps(a, `0`, idx, b);
53710	assert_eq_m512(r, a);
53711	let r = _mm512_mask_permutex2var_ps(a, `0b11111111_11111111`, idx, b);
53712	let e = _mm512_set_ps(
53713	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
53714	);
53715	assert_eq_m512(r, e);
53716	}
53717
53718	#[simd_test(enable = "avx512f")]
53719	unsafe fn test_mm512_maskz_permutex2var_ps() {
53720	let a = _mm512_set_ps(
53721	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53722	);
53723	#[rustfmt::skip]
53724	let idx = _mm512_set_epi32(
53725	`1`, `1` << `4`, `2`, `1` << `4`,
53726	`3`, `1` << `4`, `4`, `1` << `4`,
53727	`5`, `1` << `4`, `6`, `1` << `4`,
53728	`7`, `1` << `4`, `8`, `1` << `4`,
53729	);
53730	let b = _mm512_set1_ps(`100.`);
53731	let r = _mm512_maskz_permutex2var_ps(`0`, a, idx, b);
53732	assert_eq_m512(r, _mm512_setzero_ps());
53733	let r = _mm512_maskz_permutex2var_ps(`0b00000000_11111111`, a, idx, b);
53734	let e = _mm512_set_ps(
53735	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
53736	);
53737	assert_eq_m512(r, e);
53738	}
53739
53740	#[simd_test(enable = "avx512f")]
53741	unsafe fn test_mm512_mask2_permutex2var_ps() {
53742	let a = _mm512_set_ps(
53743	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
53744	);
53745	#[rustfmt::skip]
53746	let idx = _mm512_set_epi32(
53747	`1`, `1` << `4`, `2`, `1` << `4`,
53748	`3`, `1` << `4`, `4`, `1` << `4`,
53749	`5`, `1` << `4`, `6`, `1` << `4`,
53750	`7`, `1` << `4`, `8`, `1` << `4`,
53751	);
53752	let b = _mm512_set1_ps(`100.`);
53753	let r = _mm512_mask2_permutex2var_ps(a, idx, `0`, b);
53754	assert_eq_m512(r, _mm512_castsi512_ps(idx));
53755	let r = _mm512_mask2_permutex2var_ps(a, idx, `0b11111111_11111111`, b);
53756	let e = _mm512_set_ps(
53757	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
53758	);
53759	assert_eq_m512(r, e);
53760	}
53761
53762	#[simd_test(enable = "avx512f,avx512vl")]
53763	unsafe fn test_mm256_permutex2var_ps() {
53764	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53765	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53766	let b = _mm256_set1_ps(`100.`);
53767	let r = _mm256_permutex2var_ps(a, idx, b);
53768	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
53769	assert_eq_m256(r, e);
53770	}
53771
53772	#[simd_test(enable = "avx512f,avx512vl")]
53773	unsafe fn test_mm256_mask_permutex2var_ps() {
53774	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53775	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53776	let b = _mm256_set1_ps(`100.`);
53777	let r = _mm256_mask_permutex2var_ps(a, `0`, idx, b);
53778	assert_eq_m256(r, a);
53779	let r = _mm256_mask_permutex2var_ps(a, `0b11111111`, idx, b);
53780	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
53781	assert_eq_m256(r, e);
53782	}
53783
53784	#[simd_test(enable = "avx512f,avx512vl")]
53785	unsafe fn test_mm256_maskz_permutex2var_ps() {
53786	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53787	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53788	let b = _mm256_set1_ps(`100.`);
53789	let r = _mm256_maskz_permutex2var_ps(`0`, a, idx, b);
53790	assert_eq_m256(r, _mm256_setzero_ps());
53791	let r = _mm256_maskz_permutex2var_ps(`0b11111111`, a, idx, b);
53792	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
53793	assert_eq_m256(r, e);
53794	}
53795
53796	#[simd_test(enable = "avx512f,avx512vl")]
53797	unsafe fn test_mm256_mask2_permutex2var_ps() {
53798	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
53799	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
53800	let b = _mm256_set1_ps(`100.`);
53801	let r = _mm256_mask2_permutex2var_ps(a, idx, `0`, b);
53802	assert_eq_m256(r, _mm256_castsi256_ps(idx));
53803	let r = _mm256_mask2_permutex2var_ps(a, idx, `0b11111111`, b);
53804	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
53805	assert_eq_m256(r, e);
53806	}
53807
53808	#[simd_test(enable = "avx512f,avx512vl")]
53809	unsafe fn test_mm_permutex2var_ps() {
53810	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53811	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53812	let b = _mm_set1_ps(`100.`);
53813	let r = _mm_permutex2var_ps(a, idx, b);
53814	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
53815	assert_eq_m128(r, e);
53816	}
53817
53818	#[simd_test(enable = "avx512f,avx512vl")]
53819	unsafe fn test_mm_mask_permutex2var_ps() {
53820	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53821	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53822	let b = _mm_set1_ps(`100.`);
53823	let r = _mm_mask_permutex2var_ps(a, `0`, idx, b);
53824	assert_eq_m128(r, a);
53825	let r = _mm_mask_permutex2var_ps(a, `0b00001111`, idx, b);
53826	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
53827	assert_eq_m128(r, e);
53828	}
53829
53830	#[simd_test(enable = "avx512f,avx512vl")]
53831	unsafe fn test_mm_maskz_permutex2var_ps() {
53832	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53833	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53834	let b = _mm_set1_ps(`100.`);
53835	let r = _mm_maskz_permutex2var_ps(`0`, a, idx, b);
53836	assert_eq_m128(r, _mm_setzero_ps());
53837	let r = _mm_maskz_permutex2var_ps(`0b00001111`, a, idx, b);
53838	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
53839	assert_eq_m128(r, e);
53840	}
53841
53842	#[simd_test(enable = "avx512f,avx512vl")]
53843	unsafe fn test_mm_mask2_permutex2var_ps() {
53844	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
53845	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
53846	let b = _mm_set1_ps(`100.`);
53847	let r = _mm_mask2_permutex2var_ps(a, idx, `0`, b);
53848	assert_eq_m128(r, _mm_castsi128_ps(idx));
53849	let r = _mm_mask2_permutex2var_ps(a, idx, `0b00001111`, b);
53850	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
53851	assert_eq_m128(r, e);
53852	}
53853
53854	#[simd_test(enable = "avx512f")]
53855	unsafe fn test_mm512_shuffle_epi32() {
53856	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
53857	let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53858	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
53859	assert_eq_m512i(r, e);
53860	}
53861
53862	#[simd_test(enable = "avx512f")]
53863	unsafe fn test_mm512_mask_shuffle_epi32() {
53864	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
53865	let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
53866	assert_eq_m512i(r, a);
53867	let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b11111111_11111111`, a);
53868	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
53869	assert_eq_m512i(r, e);
53870	}
53871
53872	#[simd_test(enable = "avx512f")]
53873	unsafe fn test_mm512_maskz_shuffle_epi32() {
53874	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
53875	let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
53876	assert_eq_m512i(r, _mm512_setzero_si512());
53877	let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b00000000_11111111`, a);
53878	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
53879	assert_eq_m512i(r, e);
53880	}
53881
53882	#[simd_test(enable = "avx512f,avx512vl")]
53883	unsafe fn test_mm256_mask_shuffle_epi32() {
53884	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
53885	let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
53886	assert_eq_m256i(r, a);
53887	let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b11111111`, a);
53888	let e = _mm256_set_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
53889	assert_eq_m256i(r, e);
53890	}
53891
53892	#[simd_test(enable = "avx512f,avx512vl")]
53893	unsafe fn test_mm256_maskz_shuffle_epi32() {
53894	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
53895	let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
53896	assert_eq_m256i(r, _mm256_setzero_si256());
53897	let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b11111111`, a);
53898	let e = _mm256_set_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
53899	assert_eq_m256i(r, e);
53900	}
53901
53902	#[simd_test(enable = "avx512f,avx512vl")]
53903	unsafe fn test_mm_mask_shuffle_epi32() {
53904	let a = _mm_set_epi32(`1`, `4`, `5`, `8`);
53905	let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
53906	assert_eq_m128i(r, a);
53907	let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b00001111`, a);
53908	let e = _mm_set_epi32(`8`, `8`, `1`, `1`);
53909	assert_eq_m128i(r, e);
53910	}
53911
53912	#[simd_test(enable = "avx512f,avx512vl")]
53913	unsafe fn test_mm_maskz_shuffle_epi32() {
53914	let a = _mm_set_epi32(`1`, `4`, `5`, `8`);
53915	let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
53916	assert_eq_m128i(r, _mm_setzero_si128());
53917	let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b00001111`, a);
53918	let e = _mm_set_epi32(`8`, `8`, `1`, `1`);
53919	assert_eq_m128i(r, e);
53920	}
53921
53922	#[simd_test(enable = "avx512f")]
53923	unsafe fn test_mm512_shuffle_ps() {
53924	let a = _mm512_setr_ps(
53925	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
53926	);
53927	let b = _mm512_setr_ps(
53928	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
53929	);
53930	let r = _mm512_shuffle_ps::<`0b00_00_11_11`>(a, b);
53931	let e = _mm512_setr_ps(
53932	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`,
53933	);
53934	assert_eq_m512(r, e);
53935	}
53936
53937	#[simd_test(enable = "avx512f")]
53938	unsafe fn test_mm512_mask_shuffle_ps() {
53939	let a = _mm512_setr_ps(
53940	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
53941	);
53942	let b = _mm512_setr_ps(
53943	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
53944	);
53945	let r = _mm512_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0`, a, b);
53946	assert_eq_m512(r, a);
53947	let r = _mm512_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b11111111_11111111`, a, b);
53948	let e = _mm512_setr_ps(
53949	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`,
53950	);
53951	assert_eq_m512(r, e);
53952	}
53953
53954	#[simd_test(enable = "avx512f")]
53955	unsafe fn test_mm512_maskz_shuffle_ps() {
53956	let a = _mm512_setr_ps(
53957	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
53958	);
53959	let b = _mm512_setr_ps(
53960	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
53961	);
53962	let r = _mm512_maskz_shuffle_ps::<`0b00_00_11_11`>(`0`, a, b);
53963	assert_eq_m512(r, _mm512_setzero_ps());
53964	let r = _mm512_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b00000000_11111111`, a, b);
53965	let e = _mm512_setr_ps(
53966	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
53967	);
53968	assert_eq_m512(r, e);
53969	}
53970
53971	#[simd_test(enable = "avx512f,avx512vl")]
53972	unsafe fn test_mm256_mask_shuffle_ps() {
53973	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
53974	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
53975	let r = _mm256_mask_shuffle_ps::<`0b11_11_11_11`>(a, `0`, a, b);
53976	assert_eq_m256(r, a);
53977	let r = _mm256_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b11111111`, a, b);
53978	let e = _mm256_set_ps(`7.`, `7.`, `1.`, `1.`, `15.`, `15.`, `9.`, `9.`);
53979	assert_eq_m256(r, e);
53980	}
53981
53982	#[simd_test(enable = "avx512f,avx512vl")]
53983	unsafe fn test_mm256_maskz_shuffle_ps() {
53984	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
53985	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
53986	let r = _mm256_maskz_shuffle_ps::<`0b11_11_11_11`>(`0`, a, b);
53987	assert_eq_m256(r, _mm256_setzero_ps());
53988	let r = _mm256_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b11111111`, a, b);
53989	let e = _mm256_set_ps(`7.`, `7.`, `1.`, `1.`, `15.`, `15.`, `9.`, `9.`);
53990	assert_eq_m256(r, e);
53991	}
53992
53993	#[simd_test(enable = "avx512f,avx512vl")]
53994	unsafe fn test_mm_mask_shuffle_ps() {
53995	let a = _mm_set_ps(`1.`, `4.`, `5.`, `8.`);
53996	let b = _mm_set_ps(`2.`, `3.`, `6.`, `7.`);
53997	let r = _mm_mask_shuffle_ps::<`0b11_11_11_11`>(a, `0`, a, b);
53998	assert_eq_m128(r, a);
53999	let r = _mm_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b00001111`, a, b);
54000	let e = _mm_set_ps(`7.`, `7.`, `1.`, `1.`);
54001	assert_eq_m128(r, e);
54002	}
54003
54004	#[simd_test(enable = "avx512f,avx512vl")]
54005	unsafe fn test_mm_maskz_shuffle_ps() {
54006	let a = _mm_set_ps(`1.`, `4.`, `5.`, `8.`);
54007	let b = _mm_set_ps(`2.`, `3.`, `6.`, `7.`);
54008	let r = _mm_maskz_shuffle_ps::<`0b11_11_11_11`>(`0`, a, b);
54009	assert_eq_m128(r, _mm_setzero_ps());
54010	let r = _mm_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b00001111`, a, b);
54011	let e = _mm_set_ps(`7.`, `7.`, `1.`, `1.`);
54012	assert_eq_m128(r, e);
54013	}
54014
54015	#[simd_test(enable = "avx512f")]
54016	unsafe fn test_mm512_shuffle_i32x4() {
54017	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
54018	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
54019	let r = _mm512_shuffle_i32x4::<`0b00_00_00_00`>(a, b);
54020	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `2`, `3`, `6`, `7`, `2`, `3`, `6`, `7`);
54021	assert_eq_m512i(r, e);
54022	}
54023
54024	#[simd_test(enable = "avx512f")]
54025	unsafe fn test_mm512_mask_shuffle_i32x4() {
54026	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
54027	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
54028	let r = _mm512_mask_shuffle_i32x4::<`0b00_00_00_00`>(a, `0`, a, b);
54029	assert_eq_m512i(r, a);
54030	let r = _mm512_mask_shuffle_i32x4::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a, b);
54031	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `2`, `3`, `6`, `7`, `2`, `3`, `6`, `7`);
54032	assert_eq_m512i(r, e);
54033	}
54034
54035	#[simd_test(enable = "avx512f")]
54036	unsafe fn test_mm512_maskz_shuffle_i32x4() {
54037	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
54038	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
54039	let r = _mm512_maskz_shuffle_i32x4::<`0b00_00_00_00`>(`0`, a, b);
54040	assert_eq_m512i(r, _mm512_setzero_si512());
54041	let r = _mm512_maskz_shuffle_i32x4::<`0b00_00_00_00`>(`0b00000000_11111111`, a, b);
54042	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54043	assert_eq_m512i(r, e);
54044	}
54045
54046	#[simd_test(enable = "avx512f,avx512vl")]
54047	unsafe fn test_mm256_shuffle_i32x4() {
54048	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
54049	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
54050	let r = _mm256_shuffle_i32x4::<`0b00`>(a, b);
54051	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
54052	assert_eq_m256i(r, e);
54053	}
54054
54055	#[simd_test(enable = "avx512f,avx512vl")]
54056	unsafe fn test_mm256_mask_shuffle_i32x4() {
54057	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
54058	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
54059	let r = _mm256_mask_shuffle_i32x4::<`0b00`>(a, `0`, a, b);
54060	assert_eq_m256i(r, a);
54061	let r = _mm256_mask_shuffle_i32x4::<`0b00`>(a, `0b11111111`, a, b);
54062	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
54063	assert_eq_m256i(r, e);
54064	}
54065
54066	#[simd_test(enable = "avx512f,avx512vl")]
54067	unsafe fn test_mm256_maskz_shuffle_i32x4() {
54068	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
54069	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
54070	let r = _mm256_maskz_shuffle_i32x4::<`0b00`>(`0`, a, b);
54071	assert_eq_m256i(r, _mm256_setzero_si256());
54072	let r = _mm256_maskz_shuffle_i32x4::<`0b00`>(`0b11111111`, a, b);
54073	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
54074	assert_eq_m256i(r, e);
54075	}
54076
54077	#[simd_test(enable = "avx512f")]
54078	unsafe fn test_mm512_shuffle_f32x4() {
54079	let a = _mm512_setr_ps(
54080	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
54081	);
54082	let b = _mm512_setr_ps(
54083	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
54084	);
54085	let r = _mm512_shuffle_f32x4::<`0b00_00_00_00`>(a, b);
54086	let e = _mm512_setr_ps(
54087	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`,
54088	);
54089	assert_eq_m512(r, e);
54090	}
54091
54092	#[simd_test(enable = "avx512f")]
54093	unsafe fn test_mm512_mask_shuffle_f32x4() {
54094	let a = _mm512_setr_ps(
54095	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
54096	);
54097	let b = _mm512_setr_ps(
54098	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
54099	);
54100	let r = _mm512_mask_shuffle_f32x4::<`0b00_00_00_00`>(a, `0`, a, b);
54101	assert_eq_m512(r, a);
54102	let r = _mm512_mask_shuffle_f32x4::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a, b);
54103	let e = _mm512_setr_ps(
54104	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`,
54105	);
54106	assert_eq_m512(r, e);
54107	}
54108
54109	#[simd_test(enable = "avx512f")]
54110	unsafe fn test_mm512_maskz_shuffle_f32x4() {
54111	let a = _mm512_setr_ps(
54112	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
54113	);
54114	let b = _mm512_setr_ps(
54115	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
54116	);
54117	let r = _mm512_maskz_shuffle_f32x4::<`0b00_00_00_00`>(`0`, a, b);
54118	assert_eq_m512(r, _mm512_setzero_ps());
54119	let r = _mm512_maskz_shuffle_f32x4::<`0b00_00_00_00`>(`0b00000000_11111111`, a, b);
54120	let e = _mm512_setr_ps(
54121	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54122	);
54123	assert_eq_m512(r, e);
54124	}
54125
54126	#[simd_test(enable = "avx512f,avx512vl")]
54127	unsafe fn test_mm256_shuffle_f32x4() {
54128	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
54129	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
54130	let r = _mm256_shuffle_f32x4::<`0b00`>(a, b);
54131	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
54132	assert_eq_m256(r, e);
54133	}
54134
54135	#[simd_test(enable = "avx512f,avx512vl")]
54136	unsafe fn test_mm256_mask_shuffle_f32x4() {
54137	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
54138	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
54139	let r = _mm256_mask_shuffle_f32x4::<`0b00`>(a, `0`, a, b);
54140	assert_eq_m256(r, a);
54141	let r = _mm256_mask_shuffle_f32x4::<`0b00`>(a, `0b11111111`, a, b);
54142	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
54143	assert_eq_m256(r, e);
54144	}
54145
54146	#[simd_test(enable = "avx512f,avx512vl")]
54147	unsafe fn test_mm256_maskz_shuffle_f32x4() {
54148	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
54149	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
54150	let r = _mm256_maskz_shuffle_f32x4::<`0b00`>(`0`, a, b);
54151	assert_eq_m256(r, _mm256_setzero_ps());
54152	let r = _mm256_maskz_shuffle_f32x4::<`0b00`>(`0b11111111`, a, b);
54153	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
54154	assert_eq_m256(r, e);
54155	}
54156
54157	#[simd_test(enable = "avx512f")]
54158	unsafe fn test_mm512_extractf32x4_ps() {
54159	let a = _mm512_setr_ps(
54160	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54161	);
54162	let r = _mm512_extractf32x4_ps::<`1`>(a);
54163	let e = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
54164	assert_eq_m128(r, e);
54165	}
54166
54167	#[simd_test(enable = "avx512f")]
54168	unsafe fn test_mm512_mask_extractf32x4_ps() {
54169	let a = _mm512_setr_ps(
54170	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54171	);
54172	let src = _mm_set1_ps(`100.`);
54173	let r = _mm512_mask_extractf32x4_ps::<`1`>(src, `0`, a);
54174	assert_eq_m128(r, src);
54175	let r = _mm512_mask_extractf32x4_ps::<`1`>(src, `0b11111111`, a);
54176	let e = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
54177	assert_eq_m128(r, e);
54178	}
54179
54180	#[simd_test(enable = "avx512f")]
54181	unsafe fn test_mm512_maskz_extractf32x4_ps() {
54182	let a = _mm512_setr_ps(
54183	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54184	);
54185	let r = _mm512_maskz_extractf32x4_ps::<`1`>(`0`, a);
54186	assert_eq_m128(r, _mm_setzero_ps());
54187	let r = _mm512_maskz_extractf32x4_ps::<`1`>(`0b00000001`, a);
54188	let e = _mm_setr_ps(`5.`, `0.`, `0.`, `0.`);
54189	assert_eq_m128(r, e);
54190	}
54191
54192	#[simd_test(enable = "avx512f,avx512vl")]
54193	unsafe fn test_mm256_extractf32x4_ps() {
54194	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54195	let r = _mm256_extractf32x4_ps::<`1`>(a);
54196	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54197	assert_eq_m128(r, e);
54198	}
54199
54200	#[simd_test(enable = "avx512f,avx512vl")]
54201	unsafe fn test_mm256_mask_extractf32x4_ps() {
54202	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54203	let src = _mm_set1_ps(`100.`);
54204	let r = _mm256_mask_extractf32x4_ps::<`1`>(src, `0`, a);
54205	assert_eq_m128(r, src);
54206	let r = _mm256_mask_extractf32x4_ps::<`1`>(src, `0b00001111`, a);
54207	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54208	assert_eq_m128(r, e);
54209	}
54210
54211	#[simd_test(enable = "avx512f,avx512vl")]
54212	unsafe fn test_mm256_maskz_extractf32x4_ps() {
54213	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54214	let r = _mm256_maskz_extractf32x4_ps::<`1`>(`0`, a);
54215	assert_eq_m128(r, _mm_setzero_ps());
54216	let r = _mm256_maskz_extractf32x4_ps::<`1`>(`0b00001111`, a);
54217	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54218	assert_eq_m128(r, e);
54219	}
54220
54221	#[simd_test(enable = "avx512f")]
54222	unsafe fn test_mm512_extracti32x4_epi32() {
54223	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54224	let r = _mm512_extracti32x4_epi32::<`1`>(a);
54225	let e = _mm_setr_epi32(`5`, `6`, `7`, `8`);
54226	assert_eq_m128i(r, e);
54227	}
54228
54229	#[simd_test(enable = "avx512f")]
54230	unsafe fn test_mm512_mask_extracti32x4_epi32() {
54231	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54232	let src = _mm_set1_epi32(`100`);
54233	let r = _mm512_mask_extracti32x4_epi32::<`1`>(src, `0`, a);
54234	assert_eq_m128i(r, src);
54235	let r = _mm512_mask_extracti32x4_epi32::<`1`>(src, `0b11111111`, a);
54236	let e = _mm_setr_epi32(`5`, `6`, `7`, `8`);
54237	assert_eq_m128i(r, e);
54238	}
54239
54240	#[simd_test(enable = "avx512f,avx512vl")]
54241	unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54242	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54243	let r = _mm512_maskz_extracti32x4_epi32::<`1`>(`0`, a);
54244	assert_eq_m128i(r, _mm_setzero_si128());
54245	let r = _mm512_maskz_extracti32x4_epi32::<`1`>(`0b00000001`, a);
54246	let e = _mm_setr_epi32(`5`, `0`, `0`, `0`);
54247	assert_eq_m128i(r, e);
54248	}
54249
54250	#[simd_test(enable = "avx512f,avx512vl")]
54251	unsafe fn test_mm256_extracti32x4_epi32() {
54252	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54253	let r = _mm256_extracti32x4_epi32::<`1`>(a);
54254	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
54255	assert_eq_m128i(r, e);
54256	}
54257
54258	#[simd_test(enable = "avx512f,avx512vl")]
54259	unsafe fn test_mm256_mask_extracti32x4_epi32() {
54260	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54261	let src = _mm_set1_epi32(`100`);
54262	let r = _mm256_mask_extracti32x4_epi32::<`1`>(src, `0`, a);
54263	assert_eq_m128i(r, src);
54264	let r = _mm256_mask_extracti32x4_epi32::<`1`>(src, `0b00001111`, a);
54265	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
54266	assert_eq_m128i(r, e);
54267	}
54268
54269	#[simd_test(enable = "avx512f,avx512vl")]
54270	unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54271	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54272	let r = _mm256_maskz_extracti32x4_epi32::<`1`>(`0`, a);
54273	assert_eq_m128i(r, _mm_setzero_si128());
54274	let r = _mm256_maskz_extracti32x4_epi32::<`1`>(`0b00001111`, a);
54275	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
54276	assert_eq_m128i(r, e);
54277	}
54278
54279	#[simd_test(enable = "avx512f")]
54280	unsafe fn test_mm512_moveldup_ps() {
54281	let a = _mm512_setr_ps(
54282	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54283	);
54284	let r = _mm512_moveldup_ps(a);
54285	let e = _mm512_setr_ps(
54286	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `9.`, `9.`, `11.`, `11.`, `13.`, `13.`, `15.`, `15.`,
54287	);
54288	assert_eq_m512(r, e);
54289	}
54290
54291	#[simd_test(enable = "avx512f")]
54292	unsafe fn test_mm512_mask_moveldup_ps() {
54293	let a = _mm512_setr_ps(
54294	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54295	);
54296	let r = _mm512_mask_moveldup_ps(a, `0`, a);
54297	assert_eq_m512(r, a);
54298	let r = _mm512_mask_moveldup_ps(a, `0b11111111_11111111`, a);
54299	let e = _mm512_setr_ps(
54300	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `9.`, `9.`, `11.`, `11.`, `13.`, `13.`, `15.`, `15.`,
54301	);
54302	assert_eq_m512(r, e);
54303	}
54304
54305	#[simd_test(enable = "avx512f")]
54306	unsafe fn test_mm512_maskz_moveldup_ps() {
54307	let a = _mm512_setr_ps(
54308	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54309	);
54310	let r = _mm512_maskz_moveldup_ps(`0`, a);
54311	assert_eq_m512(r, _mm512_setzero_ps());
54312	let r = _mm512_maskz_moveldup_ps(`0b00000000_11111111`, a);
54313	let e = _mm512_setr_ps(
54314	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54315	);
54316	assert_eq_m512(r, e);
54317	}
54318
54319	#[simd_test(enable = "avx512f,avx512vl")]
54320	unsafe fn test_mm256_mask_moveldup_ps() {
54321	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54322	let r = _mm256_mask_moveldup_ps(a, `0`, a);
54323	assert_eq_m256(r, a);
54324	let r = _mm256_mask_moveldup_ps(a, `0b11111111`, a);
54325	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
54326	assert_eq_m256(r, e);
54327	}
54328
54329	#[simd_test(enable = "avx512f,avx512vl")]
54330	unsafe fn test_mm256_maskz_moveldup_ps() {
54331	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54332	let r = _mm256_maskz_moveldup_ps(`0`, a);
54333	assert_eq_m256(r, _mm256_setzero_ps());
54334	let r = _mm256_maskz_moveldup_ps(`0b11111111`, a);
54335	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
54336	assert_eq_m256(r, e);
54337	}
54338
54339	#[simd_test(enable = "avx512f,avx512vl")]
54340	unsafe fn test_mm_mask_moveldup_ps() {
54341	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54342	let r = _mm_mask_moveldup_ps(a, `0`, a);
54343	assert_eq_m128(r, a);
54344	let r = _mm_mask_moveldup_ps(a, `0b00001111`, a);
54345	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
54346	assert_eq_m128(r, e);
54347	}
54348
54349	#[simd_test(enable = "avx512f,avx512vl")]
54350	unsafe fn test_mm_maskz_moveldup_ps() {
54351	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54352	let r = _mm_maskz_moveldup_ps(`0`, a);
54353	assert_eq_m128(r, _mm_setzero_ps());
54354	let r = _mm_maskz_moveldup_ps(`0b00001111`, a);
54355	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
54356	assert_eq_m128(r, e);
54357	}
54358
54359	#[simd_test(enable = "avx512f")]
54360	unsafe fn test_mm512_movehdup_ps() {
54361	let a = _mm512_setr_ps(
54362	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54363	);
54364	let r = _mm512_movehdup_ps(a);
54365	let e = _mm512_setr_ps(
54366	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
54367	);
54368	assert_eq_m512(r, e);
54369	}
54370
54371	#[simd_test(enable = "avx512f")]
54372	unsafe fn test_mm512_mask_movehdup_ps() {
54373	let a = _mm512_setr_ps(
54374	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54375	);
54376	let r = _mm512_mask_movehdup_ps(a, `0`, a);
54377	assert_eq_m512(r, a);
54378	let r = _mm512_mask_movehdup_ps(a, `0b11111111_11111111`, a);
54379	let e = _mm512_setr_ps(
54380	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
54381	);
54382	assert_eq_m512(r, e);
54383	}
54384
54385	#[simd_test(enable = "avx512f")]
54386	unsafe fn test_mm512_maskz_movehdup_ps() {
54387	let a = _mm512_setr_ps(
54388	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54389	);
54390	let r = _mm512_maskz_movehdup_ps(`0`, a);
54391	assert_eq_m512(r, _mm512_setzero_ps());
54392	let r = _mm512_maskz_movehdup_ps(`0b00000000_11111111`, a);
54393	let e = _mm512_setr_ps(
54394	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54395	);
54396	assert_eq_m512(r, e);
54397	}
54398
54399	#[simd_test(enable = "avx512f,avx512vl")]
54400	unsafe fn test_mm256_mask_movehdup_ps() {
54401	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54402	let r = _mm256_mask_movehdup_ps(a, `0`, a);
54403	assert_eq_m256(r, a);
54404	let r = _mm256_mask_movehdup_ps(a, `0b11111111`, a);
54405	let e = _mm256_set_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
54406	assert_eq_m256(r, e);
54407	}
54408
54409	#[simd_test(enable = "avx512f,avx512vl")]
54410	unsafe fn test_mm256_maskz_movehdup_ps() {
54411	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54412	let r = _mm256_maskz_movehdup_ps(`0`, a);
54413	assert_eq_m256(r, _mm256_setzero_ps());
54414	let r = _mm256_maskz_movehdup_ps(`0b11111111`, a);
54415	let e = _mm256_set_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
54416	assert_eq_m256(r, e);
54417	}
54418
54419	#[simd_test(enable = "avx512f,avx512vl")]
54420	unsafe fn test_mm_mask_movehdup_ps() {
54421	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54422	let r = _mm_mask_movehdup_ps(a, `0`, a);
54423	assert_eq_m128(r, a);
54424	let r = _mm_mask_movehdup_ps(a, `0b00001111`, a);
54425	let e = _mm_set_ps(`1.`, `1.`, `3.`, `3.`);
54426	assert_eq_m128(r, e);
54427	}
54428
54429	#[simd_test(enable = "avx512f,avx512vl")]
54430	unsafe fn test_mm_maskz_movehdup_ps() {
54431	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
54432	let r = _mm_maskz_movehdup_ps(`0`, a);
54433	assert_eq_m128(r, _mm_setzero_ps());
54434	let r = _mm_maskz_movehdup_ps(`0b00001111`, a);
54435	let e = _mm_set_ps(`1.`, `1.`, `3.`, `3.`);
54436	assert_eq_m128(r, e);
54437	}
54438
54439	#[simd_test(enable = "avx512f")]
54440	unsafe fn test_mm512_inserti32x4() {
54441	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54442	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
54443	let r = _mm512_inserti32x4::<`0`>(a, b);
54444	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54445	assert_eq_m512i(r, e);
54446	}
54447
54448	#[simd_test(enable = "avx512f")]
54449	unsafe fn test_mm512_mask_inserti32x4() {
54450	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54451	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
54452	let r = _mm512_mask_inserti32x4::<`0`>(a, `0`, a, b);
54453	assert_eq_m512i(r, a);
54454	let r = _mm512_mask_inserti32x4::<`0`>(a, `0b11111111_11111111`, a, b);
54455	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54456	assert_eq_m512i(r, e);
54457	}
54458
54459	#[simd_test(enable = "avx512f")]
54460	unsafe fn test_mm512_maskz_inserti32x4() {
54461	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54462	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
54463	let r = _mm512_maskz_inserti32x4::<`0`>(`0`, a, b);
54464	assert_eq_m512i(r, _mm512_setzero_si512());
54465	let r = _mm512_maskz_inserti32x4::<`0`>(`0b00000000_11111111`, a, b);
54466	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54467	assert_eq_m512i(r, e);
54468	}
54469
54470	#[simd_test(enable = "avx512f,avx512vl")]
54471	unsafe fn test_mm256_inserti32x4() {
54472	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54473	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
54474	let r = _mm256_inserti32x4::<`1`>(a, b);
54475	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
54476	assert_eq_m256i(r, e);
54477	}
54478
54479	#[simd_test(enable = "avx512f,avx512vl")]
54480	unsafe fn test_mm256_mask_inserti32x4() {
54481	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54482	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
54483	let r = _mm256_mask_inserti32x4::<`0`>(a, `0`, a, b);
54484	assert_eq_m256i(r, a);
54485	let r = _mm256_mask_inserti32x4::<`1`>(a, `0b11111111`, a, b);
54486	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
54487	assert_eq_m256i(r, e);
54488	}
54489
54490	#[simd_test(enable = "avx512f,avx512vl")]
54491	unsafe fn test_mm256_maskz_inserti32x4() {
54492	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
54493	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
54494	let r = _mm256_maskz_inserti32x4::<`0`>(`0`, a, b);
54495	assert_eq_m256i(r, _mm256_setzero_si256());
54496	let r = _mm256_maskz_inserti32x4::<`1`>(`0b11111111`, a, b);
54497	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
54498	assert_eq_m256i(r, e);
54499	}
54500
54501	#[simd_test(enable = "avx512f")]
54502	unsafe fn test_mm512_insertf32x4() {
54503	let a = _mm512_setr_ps(
54504	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54505	);
54506	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
54507	let r = _mm512_insertf32x4::<`0`>(a, b);
54508	let e = _mm512_setr_ps(
54509	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54510	);
54511	assert_eq_m512(r, e);
54512	}
54513
54514	#[simd_test(enable = "avx512f")]
54515	unsafe fn test_mm512_mask_insertf32x4() {
54516	let a = _mm512_setr_ps(
54517	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54518	);
54519	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
54520	let r = _mm512_mask_insertf32x4::<`0`>(a, `0`, a, b);
54521	assert_eq_m512(r, a);
54522	let r = _mm512_mask_insertf32x4::<`0`>(a, `0b11111111_11111111`, a, b);
54523	let e = _mm512_setr_ps(
54524	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54525	);
54526	assert_eq_m512(r, e);
54527	}
54528
54529	#[simd_test(enable = "avx512f")]
54530	unsafe fn test_mm512_maskz_insertf32x4() {
54531	let a = _mm512_setr_ps(
54532	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
54533	);
54534	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
54535	let r = _mm512_maskz_insertf32x4::<`0`>(`0`, a, b);
54536	assert_eq_m512(r, _mm512_setzero_ps());
54537	let r = _mm512_maskz_insertf32x4::<`0`>(`0b00000000_11111111`, a, b);
54538	let e = _mm512_setr_ps(
54539	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54540	);
54541	assert_eq_m512(r, e);
54542	}
54543
54544	#[simd_test(enable = "avx512f,avx512vl")]
54545	unsafe fn test_mm256_insertf32x4() {
54546	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54547	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54548	let r = _mm256_insertf32x4::<`1`>(a, b);
54549	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
54550	assert_eq_m256(r, e);
54551	}
54552
54553	#[simd_test(enable = "avx512f,avx512vl")]
54554	unsafe fn test_mm256_mask_insertf32x4() {
54555	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54556	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54557	let r = _mm256_mask_insertf32x4::<`0`>(a, `0`, a, b);
54558	assert_eq_m256(r, a);
54559	let r = _mm256_mask_insertf32x4::<`1`>(a, `0b11111111`, a, b);
54560	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
54561	assert_eq_m256(r, e);
54562	}
54563
54564	#[simd_test(enable = "avx512f,avx512vl")]
54565	unsafe fn test_mm256_maskz_insertf32x4() {
54566	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
54567	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54568	let r = _mm256_maskz_insertf32x4::<`0`>(`0`, a, b);
54569	assert_eq_m256(r, _mm256_setzero_ps());
54570	let r = _mm256_maskz_insertf32x4::<`1`>(`0b11111111`, a, b);
54571	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
54572	assert_eq_m256(r, e);
54573	}
54574
54575	#[simd_test(enable = "avx512f")]
54576	unsafe fn test_mm512_castps128_ps512() {
54577	let a = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
54578	let r = _mm512_castps128_ps512(a);
54579	assert_eq_m128(_mm512_castps512_ps128(r), a);
54580	}
54581
54582	#[simd_test(enable = "avx512f")]
54583	unsafe fn test_mm512_castps256_ps512() {
54584	let a = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
54585	let r = _mm512_castps256_ps512(a);
54586	assert_eq_m256(_mm512_castps512_ps256(r), a);
54587	}
54588
54589	#[simd_test(enable = "avx512f")]
54590	unsafe fn test_mm512_zextps128_ps512() {
54591	let a = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
54592	let r = _mm512_zextps128_ps512(a);
54593	let e = _mm512_setr_ps(
54594	`17.`, `18.`, `19.`, `20.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54595	);
54596	assert_eq_m512(r, e);
54597	}
54598
54599	#[simd_test(enable = "avx512f")]
54600	unsafe fn test_mm512_zextps256_ps512() {
54601	let a = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
54602	let r = _mm512_zextps256_ps512(a);
54603	let e = _mm512_setr_ps(
54604	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54605	);
54606	assert_eq_m512(r, e);
54607	}
54608
54609	#[simd_test(enable = "avx512f")]
54610	unsafe fn test_mm512_castps512_ps128() {
54611	let a = _mm512_setr_ps(
54612	`17.`, `18.`, `19.`, `20.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
54613	);
54614	let r = _mm512_castps512_ps128(a);
54615	let e = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
54616	assert_eq_m128(r, e);
54617	}
54618
54619	#[simd_test(enable = "avx512f")]
54620	unsafe fn test_mm512_castps512_ps256() {
54621	let a = _mm512_setr_ps(
54622	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
54623	);
54624	let r = _mm512_castps512_ps256(a);
54625	let e = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
54626	assert_eq_m256(r, e);
54627	}
54628
54629	#[simd_test(enable = "avx512f")]
54630	unsafe fn test_mm512_castps_pd() {
54631	let a = _mm512_set1_ps(`1.`);
54632	let r = _mm512_castps_pd(a);
54633	let e = _mm512_set1_pd(`0.007812501848093234`);
54634	assert_eq_m512d(r, e);
54635	}
54636
54637	#[simd_test(enable = "avx512f")]
54638	unsafe fn test_mm512_castps_si512() {
54639	let a = _mm512_set1_ps(`1.`);
54640	let r = _mm512_castps_si512(a);
54641	let e = _mm512_set1_epi32(`1065353216`);
54642	assert_eq_m512i(r, e);
54643	}
54644
54645	#[simd_test(enable = "avx512f")]
54646	unsafe fn test_mm512_broadcastd_epi32() {
54647	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54648	let r = _mm512_broadcastd_epi32(a);
54649	let e = _mm512_set1_epi32(`20`);
54650	assert_eq_m512i(r, e);
54651	}
54652
54653	#[simd_test(enable = "avx512f")]
54654	unsafe fn test_mm512_mask_broadcastd_epi32() {
54655	let src = _mm512_set1_epi32(`20`);
54656	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54657	let r = _mm512_mask_broadcastd_epi32(src, `0`, a);
54658	assert_eq_m512i(r, src);
54659	let r = _mm512_mask_broadcastd_epi32(src, `0b11111111_11111111`, a);
54660	let e = _mm512_set1_epi32(`20`);
54661	assert_eq_m512i(r, e);
54662	}
54663
54664	#[simd_test(enable = "avx512f")]
54665	unsafe fn test_mm512_maskz_broadcastd_epi32() {
54666	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54667	let r = _mm512_maskz_broadcastd_epi32(`0`, a);
54668	assert_eq_m512i(r, _mm512_setzero_si512());
54669	let r = _mm512_maskz_broadcastd_epi32(`0b00000000_11111111`, a);
54670	let e = _mm512_setr_epi32(`20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54671	assert_eq_m512i(r, e);
54672	}
54673
54674	#[simd_test(enable = "avx512f,avx512vl")]
54675	unsafe fn test_mm256_mask_broadcastd_epi32() {
54676	let src = _mm256_set1_epi32(`20`);
54677	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54678	let r = _mm256_mask_broadcastd_epi32(src, `0`, a);
54679	assert_eq_m256i(r, src);
54680	let r = _mm256_mask_broadcastd_epi32(src, `0b11111111`, a);
54681	let e = _mm256_set1_epi32(`20`);
54682	assert_eq_m256i(r, e);
54683	}
54684
54685	#[simd_test(enable = "avx512f,avx512vl")]
54686	unsafe fn test_mm256_maskz_broadcastd_epi32() {
54687	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54688	let r = _mm256_maskz_broadcastd_epi32(`0`, a);
54689	assert_eq_m256i(r, _mm256_setzero_si256());
54690	let r = _mm256_maskz_broadcastd_epi32(`0b11111111`, a);
54691	let e = _mm256_set1_epi32(`20`);
54692	assert_eq_m256i(r, e);
54693	}
54694
54695	#[simd_test(enable = "avx512f,avx512vl")]
54696	unsafe fn test_mm_mask_broadcastd_epi32() {
54697	let src = _mm_set1_epi32(`20`);
54698	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54699	let r = _mm_mask_broadcastd_epi32(src, `0`, a);
54700	assert_eq_m128i(r, src);
54701	let r = _mm_mask_broadcastd_epi32(src, `0b00001111`, a);
54702	let e = _mm_set1_epi32(`20`);
54703	assert_eq_m128i(r, e);
54704	}
54705
54706	#[simd_test(enable = "avx512f,avx512vl")]
54707	unsafe fn test_mm_maskz_broadcastd_epi32() {
54708	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54709	let r = _mm_maskz_broadcastd_epi32(`0`, a);
54710	assert_eq_m128i(r, _mm_setzero_si128());
54711	let r = _mm_maskz_broadcastd_epi32(`0b00001111`, a);
54712	let e = _mm_set1_epi32(`20`);
54713	assert_eq_m128i(r, e);
54714	}
54715
54716	#[simd_test(enable = "avx512f")]
54717	unsafe fn test_mm512_broadcastss_ps() {
54718	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54719	let r = _mm512_broadcastss_ps(a);
54720	let e = _mm512_set1_ps(`20.`);
54721	assert_eq_m512(r, e);
54722	}
54723
54724	#[simd_test(enable = "avx512f")]
54725	unsafe fn test_mm512_mask_broadcastss_ps() {
54726	let src = _mm512_set1_ps(`20.`);
54727	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54728	let r = _mm512_mask_broadcastss_ps(src, `0`, a);
54729	assert_eq_m512(r, src);
54730	let r = _mm512_mask_broadcastss_ps(src, `0b11111111_11111111`, a);
54731	let e = _mm512_set1_ps(`20.`);
54732	assert_eq_m512(r, e);
54733	}
54734
54735	#[simd_test(enable = "avx512f")]
54736	unsafe fn test_mm512_maskz_broadcastss_ps() {
54737	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54738	let r = _mm512_maskz_broadcastss_ps(`0`, a);
54739	assert_eq_m512(r, _mm512_setzero_ps());
54740	let r = _mm512_maskz_broadcastss_ps(`0b00000000_11111111`, a);
54741	let e = _mm512_setr_ps(
54742	`20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
54743	);
54744	assert_eq_m512(r, e);
54745	}
54746
54747	#[simd_test(enable = "avx512f,avx512vl")]
54748	unsafe fn test_mm256_mask_broadcastss_ps() {
54749	let src = _mm256_set1_ps(`20.`);
54750	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54751	let r = _mm256_mask_broadcastss_ps(src, `0`, a);
54752	assert_eq_m256(r, src);
54753	let r = _mm256_mask_broadcastss_ps(src, `0b11111111`, a);
54754	let e = _mm256_set1_ps(`20.`);
54755	assert_eq_m256(r, e);
54756	}
54757
54758	#[simd_test(enable = "avx512f,avx512vl")]
54759	unsafe fn test_mm256_maskz_broadcastss_ps() {
54760	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54761	let r = _mm256_maskz_broadcastss_ps(`0`, a);
54762	assert_eq_m256(r, _mm256_setzero_ps());
54763	let r = _mm256_maskz_broadcastss_ps(`0b11111111`, a);
54764	let e = _mm256_set1_ps(`20.`);
54765	assert_eq_m256(r, e);
54766	}
54767
54768	#[simd_test(enable = "avx512f,avx512vl")]
54769	unsafe fn test_mm_mask_broadcastss_ps() {
54770	let src = _mm_set1_ps(`20.`);
54771	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54772	let r = _mm_mask_broadcastss_ps(src, `0`, a);
54773	assert_eq_m128(r, src);
54774	let r = _mm_mask_broadcastss_ps(src, `0b00001111`, a);
54775	let e = _mm_set1_ps(`20.`);
54776	assert_eq_m128(r, e);
54777	}
54778
54779	#[simd_test(enable = "avx512f,avx512vl")]
54780	unsafe fn test_mm_maskz_broadcastss_ps() {
54781	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54782	let r = _mm_maskz_broadcastss_ps(`0`, a);
54783	assert_eq_m128(r, _mm_setzero_ps());
54784	let r = _mm_maskz_broadcastss_ps(`0b00001111`, a);
54785	let e = _mm_set1_ps(`20.`);
54786	assert_eq_m128(r, e);
54787	}
54788
54789	#[simd_test(enable = "avx512f")]
54790	unsafe fn test_mm512_broadcast_i32x4() {
54791	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54792	let r = _mm512_broadcast_i32x4(a);
54793	let e = _mm512_set_epi32(
54794	`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`,
54795	);
54796	assert_eq_m512i(r, e);
54797	}
54798
54799	#[simd_test(enable = "avx512f")]
54800	unsafe fn test_mm512_mask_broadcast_i32x4() {
54801	let src = _mm512_set1_epi32(`20`);
54802	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54803	let r = _mm512_mask_broadcast_i32x4(src, `0`, a);
54804	assert_eq_m512i(r, src);
54805	let r = _mm512_mask_broadcast_i32x4(src, `0b11111111_11111111`, a);
54806	let e = _mm512_set_epi32(
54807	`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`,
54808	);
54809	assert_eq_m512i(r, e);
54810	}
54811
54812	#[simd_test(enable = "avx512f")]
54813	unsafe fn test_mm512_maskz_broadcast_i32x4() {
54814	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54815	let r = _mm512_maskz_broadcast_i32x4(`0`, a);
54816	assert_eq_m512i(r, _mm512_setzero_si512());
54817	let r = _mm512_maskz_broadcast_i32x4(`0b00000000_11111111`, a);
54818	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
54819	assert_eq_m512i(r, e);
54820	}
54821
54822	#[simd_test(enable = "avx512f,avx512vl")]
54823	unsafe fn test_mm256_broadcast_i32x4() {
54824	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54825	let r = _mm256_broadcast_i32x4(a);
54826	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
54827	assert_eq_m256i(r, e);
54828	}
54829
54830	#[simd_test(enable = "avx512f,avx512vl")]
54831	unsafe fn test_mm256_mask_broadcast_i32x4() {
54832	let src = _mm256_set1_epi32(`20`);
54833	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54834	let r = _mm256_mask_broadcast_i32x4(src, `0`, a);
54835	assert_eq_m256i(r, src);
54836	let r = _mm256_mask_broadcast_i32x4(src, `0b11111111`, a);
54837	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
54838	assert_eq_m256i(r, e);
54839	}
54840
54841	#[simd_test(enable = "avx512f,avx512vl")]
54842	unsafe fn test_mm256_maskz_broadcast_i32x4() {
54843	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
54844	let r = _mm256_maskz_broadcast_i32x4(`0`, a);
54845	assert_eq_m256i(r, _mm256_setzero_si256());
54846	let r = _mm256_maskz_broadcast_i32x4(`0b11111111`, a);
54847	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
54848	assert_eq_m256i(r, e);
54849	}
54850
54851	#[simd_test(enable = "avx512f")]
54852	unsafe fn test_mm512_broadcast_f32x4() {
54853	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54854	let r = _mm512_broadcast_f32x4(a);
54855	let e = _mm512_set_ps(
54856	`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
54857	);
54858	assert_eq_m512(r, e);
54859	}
54860
54861	#[simd_test(enable = "avx512f")]
54862	unsafe fn test_mm512_mask_broadcast_f32x4() {
54863	let src = _mm512_set1_ps(`20.`);
54864	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54865	let r = _mm512_mask_broadcast_f32x4(src, `0`, a);
54866	assert_eq_m512(r, src);
54867	let r = _mm512_mask_broadcast_f32x4(src, `0b11111111_11111111`, a);
54868	let e = _mm512_set_ps(
54869	`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
54870	);
54871	assert_eq_m512(r, e);
54872	}
54873
54874	#[simd_test(enable = "avx512f")]
54875	unsafe fn test_mm512_maskz_broadcast_f32x4() {
54876	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54877	let r = _mm512_maskz_broadcast_f32x4(`0`, a);
54878	assert_eq_m512(r, _mm512_setzero_ps());
54879	let r = _mm512_maskz_broadcast_f32x4(`0b00000000_11111111`, a);
54880	let e = _mm512_set_ps(
54881	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
54882	);
54883	assert_eq_m512(r, e);
54884	}
54885
54886	#[simd_test(enable = "avx512f,avx512vl")]
54887	unsafe fn test_mm256_broadcast_f32x4() {
54888	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54889	let r = _mm256_broadcast_f32x4(a);
54890	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
54891	assert_eq_m256(r, e);
54892	}
54893
54894	#[simd_test(enable = "avx512f,avx512vl")]
54895	unsafe fn test_mm256_mask_broadcast_f32x4() {
54896	let src = _mm256_set1_ps(`20.`);
54897	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54898	let r = _mm256_mask_broadcast_f32x4(src, `0`, a);
54899	assert_eq_m256(r, src);
54900	let r = _mm256_mask_broadcast_f32x4(src, `0b11111111`, a);
54901	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
54902	assert_eq_m256(r, e);
54903	}
54904
54905	#[simd_test(enable = "avx512f,avx512vl")]
54906	unsafe fn test_mm256_maskz_broadcast_f32x4() {
54907	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
54908	let r = _mm256_maskz_broadcast_f32x4(`0`, a);
54909	assert_eq_m256(r, _mm256_setzero_ps());
54910	let r = _mm256_maskz_broadcast_f32x4(`0b11111111`, a);
54911	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
54912	assert_eq_m256(r, e);
54913	}
54914
54915	#[simd_test(enable = "avx512f")]
54916	unsafe fn test_mm512_mask_blend_epi32() {
54917	let a = _mm512_set1_epi32(`1`);
54918	let b = _mm512_set1_epi32(`2`);
54919	let r = _mm512_mask_blend_epi32(`0b11111111_00000000`, a, b);
54920	let e = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54921	assert_eq_m512i(r, e);
54922	}
54923
54924	#[simd_test(enable = "avx512f,avx512vl")]
54925	unsafe fn test_mm256_mask_blend_epi32() {
54926	let a = _mm256_set1_epi32(`1`);
54927	let b = _mm256_set1_epi32(`2`);
54928	let r = _mm256_mask_blend_epi32(`0b11111111`, a, b);
54929	let e = _mm256_set1_epi32(`2`);
54930	assert_eq_m256i(r, e);
54931	}
54932
54933	#[simd_test(enable = "avx512f,avx512vl")]
54934	unsafe fn test_mm_mask_blend_epi32() {
54935	let a = _mm_set1_epi32(`1`);
54936	let b = _mm_set1_epi32(`2`);
54937	let r = _mm_mask_blend_epi32(`0b00001111`, a, b);
54938	let e = _mm_set1_epi32(`2`);
54939	assert_eq_m128i(r, e);
54940	}
54941
54942	#[simd_test(enable = "avx512f")]
54943	unsafe fn test_mm512_mask_blend_ps() {
54944	let a = _mm512_set1_ps(`1.`);
54945	let b = _mm512_set1_ps(`2.`);
54946	let r = _mm512_mask_blend_ps(`0b11111111_00000000`, a, b);
54947	let e = _mm512_set_ps(
54948	`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
54949	);
54950	assert_eq_m512(r, e);
54951	}
54952
54953	#[simd_test(enable = "avx512f,avx512vl")]
54954	unsafe fn test_mm256_mask_blend_ps() {
54955	let a = _mm256_set1_ps(`1.`);
54956	let b = _mm256_set1_ps(`2.`);
54957	let r = _mm256_mask_blend_ps(`0b11111111`, a, b);
54958	let e = _mm256_set1_ps(`2.`);
54959	assert_eq_m256(r, e);
54960	}
54961
54962	#[simd_test(enable = "avx512f,avx512vl")]
54963	unsafe fn test_mm_mask_blend_ps() {
54964	let a = _mm_set1_ps(`1.`);
54965	let b = _mm_set1_ps(`2.`);
54966	let r = _mm_mask_blend_ps(`0b00001111`, a, b);
54967	let e = _mm_set1_ps(`2.`);
54968	assert_eq_m128(r, e);
54969	}
54970
54971	#[simd_test(enable = "avx512f")]
54972	unsafe fn test_mm512_unpackhi_epi32() {
54973	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54974	let b = _mm512_set_epi32(
54975	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
54976	);
54977	let r = _mm512_unpackhi_epi32(a, b);
54978	let e = _mm512_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
54979	assert_eq_m512i(r, e);
54980	}
54981
54982	#[simd_test(enable = "avx512f")]
54983	unsafe fn test_mm512_mask_unpackhi_epi32() {
54984	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54985	let b = _mm512_set_epi32(
54986	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
54987	);
54988	let r = _mm512_mask_unpackhi_epi32(a, `0`, a, b);
54989	assert_eq_m512i(r, a);
54990	let r = _mm512_mask_unpackhi_epi32(a, `0b11111111_11111111`, a, b);
54991	let e = _mm512_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
54992	assert_eq_m512i(r, e);
54993	}
54994
54995	#[simd_test(enable = "avx512f")]
54996	unsafe fn test_mm512_maskz_unpackhi_epi32() {
54997	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
54998	let b = _mm512_set_epi32(
54999	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
55000	);
55001	let r = _mm512_maskz_unpackhi_epi32(`0`, a, b);
55002	assert_eq_m512i(r, _mm512_setzero_si512());
55003	let r = _mm512_maskz_unpackhi_epi32(`0b00000000_11111111`, a, b);
55004	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
55005	assert_eq_m512i(r, e);
55006	}
55007
55008	#[simd_test(enable = "avx512f,avx512vl")]
55009	unsafe fn test_mm256_mask_unpackhi_epi32() {
55010	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
55011	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
55012	let r = _mm256_mask_unpackhi_epi32(a, `0`, a, b);
55013	assert_eq_m256i(r, a);
55014	let r = _mm256_mask_unpackhi_epi32(a, `0b11111111`, a, b);
55015	let e = _mm256_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`);
55016	assert_eq_m256i(r, e);
55017	}
55018
55019	#[simd_test(enable = "avx512f,avx512vl")]
55020	unsafe fn test_mm256_maskz_unpackhi_epi32() {
55021	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
55022	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
55023	let r = _mm256_maskz_unpackhi_epi32(`0`, a, b);
55024	assert_eq_m256i(r, _mm256_setzero_si256());
55025	let r = _mm256_maskz_unpackhi_epi32(`0b11111111`, a, b);
55026	let e = _mm256_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`);
55027	assert_eq_m256i(r, e);
55028	}
55029
55030	#[simd_test(enable = "avx512f,avx512vl")]
55031	unsafe fn test_mm_mask_unpackhi_epi32() {
55032	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
55033	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
55034	let r = _mm_mask_unpackhi_epi32(a, `0`, a, b);
55035	assert_eq_m128i(r, a);
55036	let r = _mm_mask_unpackhi_epi32(a, `0b00001111`, a, b);
55037	let e = _mm_set_epi32(`17`, `1`, `18`, `2`);
55038	assert_eq_m128i(r, e);
55039	}
55040
55041	#[simd_test(enable = "avx512f,avx512vl")]
55042	unsafe fn test_mm_maskz_unpackhi_epi32() {
55043	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
55044	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
55045	let r = _mm_maskz_unpackhi_epi32(`0`, a, b);
55046	assert_eq_m128i(r, _mm_setzero_si128());
55047	let r = _mm_maskz_unpackhi_epi32(`0b00001111`, a, b);
55048	let e = _mm_set_epi32(`17`, `1`, `18`, `2`);
55049	assert_eq_m128i(r, e);
55050	}
55051
55052	#[simd_test(enable = "avx512f")]
55053	unsafe fn test_mm512_unpackhi_ps() {
55054	let a = _mm512_set_ps(
55055	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
55056	);
55057	let b = _mm512_set_ps(
55058	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
55059	);
55060	let r = _mm512_unpackhi_ps(a, b);
55061	let e = _mm512_set_ps(
55062	`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
55063	);
55064	assert_eq_m512(r, e);
55065	}
55066
55067	#[simd_test(enable = "avx512f")]
55068	unsafe fn test_mm512_mask_unpackhi_ps() {
55069	let a = _mm512_set_ps(
55070	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
55071	);
55072	let b = _mm512_set_ps(
55073	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
55074	);
55075	let r = _mm512_mask_unpackhi_ps(a, `0`, a, b);
55076	assert_eq_m512(r, a);
55077	let r = _mm512_mask_unpackhi_ps(a, `0b11111111_11111111`, a, b);
55078	let e = _mm512_set_ps(
55079	`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
55080	);
55081	assert_eq_m512(r, e);
55082	}
55083
55084	#[simd_test(enable = "avx512f")]
55085	unsafe fn test_mm512_maskz_unpackhi_ps() {
55086	let a = _mm512_set_ps(
55087	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
55088	);
55089	let b = _mm512_set_ps(
55090	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
55091	);
55092	let r = _mm512_maskz_unpackhi_ps(`0`, a, b);
55093	assert_eq_m512(r, _mm512_setzero_ps());
55094	let r = _mm512_maskz_unpackhi_ps(`0b00000000_11111111`, a, b);
55095	let e = _mm512_set_ps(
55096	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
55097	);
55098	assert_eq_m512(r, e);
55099	}
55100
55101	#[simd_test(enable = "avx512f,avx512vl")]
55102	unsafe fn test_mm256_mask_unpackhi_ps() {
55103	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
55104	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
55105	let r = _mm256_mask_unpackhi_ps(a, `0`, a, b);
55106	assert_eq_m256(r, a);
55107	let r = _mm256_mask_unpackhi_ps(a, `0b11111111`, a, b);
55108	let e = _mm256_set_ps(`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`);
55109	assert_eq_m256(r, e);
55110	}
55111
55112	#[simd_test(enable = "avx512f,avx512vl")]
55113	unsafe fn test_mm256_maskz_unpackhi_ps() {
55114	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
55115	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
55116	let r = _mm256_maskz_unpackhi_ps(`0`, a, b);
55117	assert_eq_m256(r, _mm256_setzero_ps());
55118	let r = _mm256_maskz_unpackhi_ps(`0b11111111`, a, b);
55119	let e = _mm256_set_ps(`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`);
55120	assert_eq_m256(r, e);
55121	}
55122
55123	#[simd_test(enable = "avx512f,avx512vl")]
55124	unsafe fn test_mm_mask_unpackhi_ps() {
55125	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
55126	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
55127	let r = _mm_mask_unpackhi_ps(a, `0`, a, b);
55128	assert_eq_m128(r, a);
55129	let r = _mm_mask_unpackhi_ps(a, `0b00001111`, a, b);
55130	let e = _mm_set_ps(`17.`, `1.`, `18.`, `2.`);
55131	assert_eq_m128(r, e);
55132	}
55133
55134	#[simd_test(enable = "avx512f,avx512vl")]
55135	unsafe fn test_mm_maskz_unpackhi_ps() {
55136	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
55137	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
55138	let r = _mm_maskz_unpackhi_ps(`0`, a, b);
55139	assert_eq_m128(r, _mm_setzero_ps());
55140	let r = _mm_maskz_unpackhi_ps(`0b00001111`, a, b);
55141	let e = _mm_set_ps(`17.`, `1.`, `18.`, `2.`);
55142	assert_eq_m128(r, e);
55143	}
55144
55145	#[simd_test(enable = "avx512f")]
55146	unsafe fn test_mm512_unpacklo_epi32() {
55147	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
55148	let b = _mm512_set_epi32(
55149	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
55150	);
55151	let r = _mm512_unpacklo_epi32(a, b);
55152	let e = _mm512_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
55153	assert_eq_m512i(r, e);
55154	}
55155
55156	#[simd_test(enable = "avx512f")]
55157	unsafe fn test_mm512_mask_unpacklo_epi32() {
55158	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
55159	let b = _mm512_set_epi32(
55160	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
55161	);
55162	let r = _mm512_mask_unpacklo_epi32(a, `0`, a, b);
55163	assert_eq_m512i(r, a);
55164	let r = _mm512_mask_unpacklo_epi32(a, `0b11111111_11111111`, a, b);
55165	let e = _mm512_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
55166	assert_eq_m512i(r, e);
55167	}
55168
55169	#[simd_test(enable = "avx512f")]
55170	unsafe fn test_mm512_maskz_unpacklo_epi32() {
55171	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
55172	let b = _mm512_set_epi32(
55173	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
55174	);
55175	let r = _mm512_maskz_unpacklo_epi32(`0`, a, b);
55176	assert_eq_m512i(r, _mm512_setzero_si512());
55177	let r = _mm512_maskz_unpacklo_epi32(`0b00000000_11111111`, a, b);
55178	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
55179	assert_eq_m512i(r, e);
55180	}
55181
55182	#[simd_test(enable = "avx512f,avx512vl")]
55183	unsafe fn test_mm256_mask_unpacklo_epi32() {
55184	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
55185	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
55186	let r = _mm256_mask_unpacklo_epi32(a, `0`, a, b);
55187	assert_eq_m256i(r, a);
55188	let r = _mm256_mask_unpacklo_epi32(a, `0b11111111`, a, b);
55189	let e = _mm256_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`);
55190	assert_eq_m256i(r, e);
55191	}
55192
55193	#[simd_test(enable = "avx512f,avx512vl")]
55194	unsafe fn test_mm256_maskz_unpacklo_epi32() {
55195	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
55196	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
55197	let r = _mm256_maskz_unpacklo_epi32(`0`, a, b);
55198	assert_eq_m256i(r, _mm256_setzero_si256());
55199	let r = _mm256_maskz_unpacklo_epi32(`0b11111111`, a, b);
55200	let e = _mm256_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`);
55201	assert_eq_m256i(r, e);
55202	}
55203
55204	#[simd_test(enable = "avx512f,avx512vl")]
55205	unsafe fn test_mm_mask_unpacklo_epi32() {
55206	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
55207	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
55208	let r = _mm_mask_unpacklo_epi32(a, `0`, a, b);
55209	assert_eq_m128i(r, a);
55210	let r = _mm_mask_unpacklo_epi32(a, `0b00001111`, a, b);
55211	let e = _mm_set_epi32(`19`, `3`, `20`, `4`);
55212	assert_eq_m128i(r, e);
55213	}
55214
55215	#[simd_test(enable = "avx512f,avx512vl")]
55216	unsafe fn test_mm_maskz_unpacklo_epi32() {
55217	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
55218	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
55219	let r = _mm_maskz_unpacklo_epi32(`0`, a, b);
55220	assert_eq_m128i(r, _mm_setzero_si128());
55221	let r = _mm_maskz_unpacklo_epi32(`0b00001111`, a, b);
55222	let e = _mm_set_epi32(`19`, `3`, `20`, `4`);
55223	assert_eq_m128i(r, e);
55224	}
55225
55226	#[simd_test(enable = "avx512f")]
55227	unsafe fn test_mm512_unpacklo_ps() {
55228	let a = _mm512_set_ps(
55229	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
55230	);
55231	let b = _mm512_set_ps(
55232	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
55233	);
55234	let r = _mm512_unpacklo_ps(a, b);
55235	let e = _mm512_set_ps(
55236	`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
55237	);
55238	assert_eq_m512(r, e);
55239	}
55240
55241	#[simd_test(enable = "avx512f")]
55242	unsafe fn test_mm512_mask_unpacklo_ps() {
55243	let a = _mm512_set_ps(
55244	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
55245	);
55246	let b = _mm512_set_ps(
55247	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
55248	);
55249	let r = _mm512_mask_unpacklo_ps(a, `0`, a, b);
55250	assert_eq_m512(r, a);
55251	let r = _mm512_mask_unpacklo_ps(a, `0b11111111_11111111`, a, b);
55252	let e = _mm512_set_ps(
55253	`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
55254	);
55255	assert_eq_m512(r, e);
55256	}
55257
55258	#[simd_test(enable = "avx512f")]
55259	unsafe fn test_mm512_maskz_unpacklo_ps() {
55260	let a = _mm512_set_ps(
55261	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
55262	);
55263	let b = _mm512_set_ps(
55264	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
55265	);
55266	let r = _mm512_maskz_unpacklo_ps(`0`, a, b);
55267	assert_eq_m512(r, _mm512_setzero_ps());
55268	let r = _mm512_maskz_unpacklo_ps(`0b00000000_11111111`, a, b);
55269	let e = _mm512_set_ps(
55270	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
55271	);
55272	assert_eq_m512(r, e);
55273	}
55274
55275	#[simd_test(enable = "avx512f,avx512vl")]
55276	unsafe fn test_mm256_mask_unpacklo_ps() {
55277	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
55278	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
55279	let r = _mm256_mask_unpacklo_ps(a, `0`, a, b);
55280	assert_eq_m256(r, a);
55281	let r = _mm256_mask_unpacklo_ps(a, `0b11111111`, a, b);
55282	let e = _mm256_set_ps(`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`);
55283	assert_eq_m256(r, e);
55284	}
55285
55286	#[simd_test(enable = "avx512f,avx512vl")]
55287	unsafe fn test_mm256_maskz_unpacklo_ps() {
55288	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
55289	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
55290	let r = _mm256_maskz_unpacklo_ps(`0`, a, b);
55291	assert_eq_m256(r, _mm256_setzero_ps());
55292	let r = _mm256_maskz_unpacklo_ps(`0b11111111`, a, b);
55293	let e = _mm256_set_ps(`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`);
55294	assert_eq_m256(r, e);
55295	}
55296
55297	#[simd_test(enable = "avx512f,avx512vl")]
55298	unsafe fn test_mm_mask_unpacklo_ps() {
55299	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
55300	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
55301	let r = _mm_mask_unpacklo_ps(a, `0`, a, b);
55302	assert_eq_m128(r, a);
55303	let r = _mm_mask_unpacklo_ps(a, `0b00001111`, a, b);
55304	let e = _mm_set_ps(`19.`, `3.`, `20.`, `4.`);
55305	assert_eq_m128(r, e);
55306	}
55307
55308	#[simd_test(enable = "avx512f,avx512vl")]
55309	unsafe fn test_mm_maskz_unpacklo_ps() {
55310	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
55311	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
55312	let r = _mm_maskz_unpacklo_ps(`0`, a, b);
55313	assert_eq_m128(r, _mm_setzero_ps());
55314	let r = _mm_maskz_unpacklo_ps(`0b00001111`, a, b);
55315	let e = _mm_set_ps(`19.`, `3.`, `20.`, `4.`);
55316	assert_eq_m128(r, e);
55317	}
55318
55319	#[simd_test(enable = "avx512f")]
55320	unsafe fn test_mm512_alignr_epi32() {
55321	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
55322	let b = _mm512_set_epi32(
55323	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
55324	);
55325	let r = _mm512_alignr_epi32::<`0`>(a, b);
55326	assert_eq_m512i(r, b);
55327	let r = _mm512_alignr_epi32::<`16`>(a, b);
55328	assert_eq_m512i(r, b);
55329	let r = _mm512_alignr_epi32::<`1`>(a, b);
55330	let e = _mm512_set_epi32(
55331	`1`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`,
55332	);
55333	assert_eq_m512i(r, e);
55334	}
55335
55336	#[simd_test(enable = "avx512f")]
55337	unsafe fn test_mm512_mask_alignr_epi32() {
55338	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
55339	let b = _mm512_set_epi32(
55340	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
55341	);
55342	let r = _mm512_mask_alignr_epi32::<`1`>(a, `0`, a, b);
55343	assert_eq_m512i(r, a);
55344	let r = _mm512_mask_alignr_epi32::<`1`>(a, `0b11111111_11111111`, a, b);
55345	let e = _mm512_set_epi32(
55346	`1`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`,
55347	);
55348	assert_eq_m512i(r, e);
55349	}
55350
55351	#[simd_test(enable = "avx512f")]
55352	unsafe fn test_mm512_maskz_alignr_epi32() {
55353	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
55354	let b = _mm512_set_epi32(
55355	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
55356	);
55357	let r = _mm512_maskz_alignr_epi32::<`1`>(`0`, a, b);
55358	assert_eq_m512i(r, _mm512_setzero_si512());
55359	let r = _mm512_maskz_alignr_epi32::<`1`>(`0b00000000_11111111`, a, b);
55360	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`);
55361	assert_eq_m512i(r, e);
55362	}
55363
55364	#[simd_test(enable = "avx512f,avx512vl")]
55365	unsafe fn test_mm256_alignr_epi32() {
55366	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
55367	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
55368	let r = _mm256_alignr_epi32::<`0`>(a, b);
55369	assert_eq_m256i(r, b);
55370	let r = _mm256_alignr_epi32::<`1`>(a, b);
55371	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
55372	assert_eq_m256i(r, e);
55373	}
55374
55375	#[simd_test(enable = "avx512f,avx512vl")]
55376	unsafe fn test_mm256_mask_alignr_epi32() {
55377	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
55378	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
55379	let r = _mm256_mask_alignr_epi32::<`1`>(a, `0`, a, b);
55380	assert_eq_m256i(r, a);
55381	let r = _mm256_mask_alignr_epi32::<`1`>(a, `0b11111111`, a, b);
55382	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
55383	assert_eq_m256i(r, e);
55384	}
55385
55386	#[simd_test(enable = "avx512f,avx512vl")]
55387	unsafe fn test_mm256_maskz_alignr_epi32() {
55388	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
55389	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
55390	let r = _mm256_maskz_alignr_epi32::<`1`>(`0`, a, b);
55391	assert_eq_m256i(r, _mm256_setzero_si256());
55392	let r = _mm256_maskz_alignr_epi32::<`1`>(`0b11111111`, a, b);
55393	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
55394	assert_eq_m256i(r, e);
55395	}
55396
55397	#[simd_test(enable = "avx512f,avx512vl")]
55398	unsafe fn test_mm_alignr_epi32() {
55399	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
55400	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
55401	let r = _mm_alignr_epi32::<`0`>(a, b);
55402	assert_eq_m128i(r, b);
55403	let r = _mm_alignr_epi32::<`1`>(a, b);
55404	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
55405	assert_eq_m128i(r, e);
55406	}
55407
55408	#[simd_test(enable = "avx512f,avx512vl")]
55409	unsafe fn test_mm_mask_alignr_epi32() {
55410	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
55411	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
55412	let r = _mm_mask_alignr_epi32::<`1`>(a, `0`, a, b);
55413	assert_eq_m128i(r, a);
55414	let r = _mm_mask_alignr_epi32::<`1`>(a, `0b00001111`, a, b);
55415	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
55416	assert_eq_m128i(r, e);
55417	}
55418
55419	#[simd_test(enable = "avx512f,avx512vl")]
55420	unsafe fn test_mm_maskz_alignr_epi32() {
55421	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
55422	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
55423	let r = _mm_maskz_alignr_epi32::<`1`>(`0`, a, b);
55424	assert_eq_m128i(r, _mm_setzero_si128());
55425	let r = _mm_maskz_alignr_epi32::<`1`>(`0b00001111`, a, b);
55426	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
55427	assert_eq_m128i(r, e);
55428	}
55429
55430	#[simd_test(enable = "avx512f")]
55431	unsafe fn test_mm512_and_epi32() {
55432	#[rustfmt::skip]
55433	let a = _mm512_set_epi32(
55434	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55435	`0`, `0`, `0`, `0`,
55436	`0`, `0`, `0`, `0`,
55437	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55438	);
55439	#[rustfmt::skip]
55440	let b = _mm512_set_epi32(
55441	`1` << `1`, `0`, `0`, `0`,
55442	`0`, `0`, `0`, `0`,
55443	`0`, `0`, `0`, `0`,
55444	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55445	);
55446	let r = _mm512_and_epi32(a, b);
55447	let e = _mm512_set_epi32(`1` << `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
55448	assert_eq_m512i(r, e);
55449	}
55450
55451	#[simd_test(enable = "avx512f")]
55452	unsafe fn test_mm512_mask_and_epi32() {
55453	#[rustfmt::skip]
55454	let a = _mm512_set_epi32(
55455	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55456	`0`, `0`, `0`, `0`,
55457	`0`, `0`, `0`, `0`,
55458	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55459	);
55460	#[rustfmt::skip]
55461	let b = _mm512_set_epi32(
55462	`1` << `1`, `0`, `0`, `0`,
55463	`0`, `0`, `0`, `0`,
55464	`0`, `0`, `0`, `0`,
55465	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55466	);
55467	let r = _mm512_mask_and_epi32(a, `0`, a, b);
55468	assert_eq_m512i(r, a);
55469	let r = _mm512_mask_and_epi32(a, `0b01111111_11111111`, a, b);
55470	#[rustfmt::skip]
55471	let e = _mm512_set_epi32(
55472	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55473	`0`, `0`, `0`, `0`,
55474	`0`, `0`, `0`, `0`,
55475	`0`, `0`, `0`, `1` << `3`,
55476	);
55477	assert_eq_m512i(r, e);
55478	}
55479
55480	#[simd_test(enable = "avx512f")]
55481	unsafe fn test_mm512_maskz_and_epi32() {
55482	#[rustfmt::skip]
55483	let a = _mm512_set_epi32(
55484	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55485	`0`, `0`, `0`, `0`,
55486	`0`, `0`, `0`, `0`,
55487	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55488	);
55489	#[rustfmt::skip]
55490	let b = _mm512_set_epi32(
55491	`1` << `1`, `0`, `0`, `0`,
55492	`0`, `0`, `0`, `0`,
55493	`0`, `0`, `0`, `0`,
55494	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55495	);
55496	let r = _mm512_maskz_and_epi32(`0`, a, b);
55497	assert_eq_m512i(r, _mm512_setzero_si512());
55498	let r = _mm512_maskz_and_epi32(`0b00000000_11111111`, a, b);
55499	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
55500	assert_eq_m512i(r, e);
55501	}
55502
55503	#[simd_test(enable = "avx512f,avx512vl")]
55504	unsafe fn test_mm256_mask_and_epi32() {
55505	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55506	let b = _mm256_set1_epi32(`1` << `1`);
55507	let r = _mm256_mask_and_epi32(a, `0`, a, b);
55508	assert_eq_m256i(r, a);
55509	let r = _mm256_mask_and_epi32(a, `0b11111111`, a, b);
55510	let e = _mm256_set1_epi32(`1` << `1`);
55511	assert_eq_m256i(r, e);
55512	}
55513
55514	#[simd_test(enable = "avx512f,avx512vl")]
55515	unsafe fn test_mm256_maskz_and_epi32() {
55516	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55517	let b = _mm256_set1_epi32(`1` << `1`);
55518	let r = _mm256_maskz_and_epi32(`0`, a, b);
55519	assert_eq_m256i(r, _mm256_setzero_si256());
55520	let r = _mm256_maskz_and_epi32(`0b11111111`, a, b);
55521	let e = _mm256_set1_epi32(`1` << `1`);
55522	assert_eq_m256i(r, e);
55523	}
55524
55525	#[simd_test(enable = "avx512f,avx512vl")]
55526	unsafe fn test_mm_mask_and_epi32() {
55527	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55528	let b = _mm_set1_epi32(`1` << `1`);
55529	let r = _mm_mask_and_epi32(a, `0`, a, b);
55530	assert_eq_m128i(r, a);
55531	let r = _mm_mask_and_epi32(a, `0b00001111`, a, b);
55532	let e = _mm_set1_epi32(`1` << `1`);
55533	assert_eq_m128i(r, e);
55534	}
55535
55536	#[simd_test(enable = "avx512f,avx512vl")]
55537	unsafe fn test_mm_maskz_and_epi32() {
55538	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55539	let b = _mm_set1_epi32(`1` << `1`);
55540	let r = _mm_maskz_and_epi32(`0`, a, b);
55541	assert_eq_m128i(r, _mm_setzero_si128());
55542	let r = _mm_maskz_and_epi32(`0b00001111`, a, b);
55543	let e = _mm_set1_epi32(`1` << `1`);
55544	assert_eq_m128i(r, e);
55545	}
55546
55547	#[simd_test(enable = "avx512f")]
55548	unsafe fn test_mm512_and_si512() {
55549	#[rustfmt::skip]
55550	let a = _mm512_set_epi32(
55551	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55552	`0`, `0`, `0`, `0`,
55553	`0`, `0`, `0`, `0`,
55554	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55555	);
55556	#[rustfmt::skip]
55557	let b = _mm512_set_epi32(
55558	`1` << `1`, `0`, `0`, `0`,
55559	`0`, `0`, `0`, `0`,
55560	`0`, `0`, `0`, `0`,
55561	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55562	);
55563	let r = _mm512_and_epi32(a, b);
55564	let e = _mm512_set_epi32(`1` << `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
55565	assert_eq_m512i(r, e);
55566	}
55567
55568	#[simd_test(enable = "avx512f")]
55569	unsafe fn test_mm512_or_epi32() {
55570	#[rustfmt::skip]
55571	let a = _mm512_set_epi32(
55572	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55573	`0`, `0`, `0`, `0`,
55574	`0`, `0`, `0`, `0`,
55575	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55576	);
55577	#[rustfmt::skip]
55578	let b = _mm512_set_epi32(
55579	`1` << `1`, `0`, `0`, `0`,
55580	`0`, `0`, `0`, `0`,
55581	`0`, `0`, `0`, `0`,
55582	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55583	);
55584	let r = _mm512_or_epi32(a, b);
55585	#[rustfmt::skip]
55586	let e = _mm512_set_epi32(
55587	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55588	`0`, `0`, `0`, `0`,
55589	`0`, `0`, `0`, `0`,
55590	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
55591	);
55592	assert_eq_m512i(r, e);
55593	}
55594
55595	#[simd_test(enable = "avx512f")]
55596	unsafe fn test_mm512_mask_or_epi32() {
55597	#[rustfmt::skip]
55598	let a = _mm512_set_epi32(
55599	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55600	`0`, `0`, `0`, `0`,
55601	`0`, `0`, `0`, `0`,
55602	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55603	);
55604	#[rustfmt::skip]
55605	let b = _mm512_set_epi32(
55606	`1` << `1`, `0`, `0`, `0`,
55607	`0`, `0`, `0`, `0`,
55608	`0`, `0`, `0`, `0`,
55609	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55610	);
55611	let r = _mm512_mask_or_epi32(a, `0`, a, b);
55612	assert_eq_m512i(r, a);
55613	let r = _mm512_mask_or_epi32(a, `0b11111111_11111111`, a, b);
55614	#[rustfmt::skip]
55615	let e = _mm512_set_epi32(
55616	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55617	`0`, `0`, `0`, `0`,
55618	`0`, `0`, `0`, `0`,
55619	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
55620	);
55621	assert_eq_m512i(r, e);
55622	}
55623
55624	#[simd_test(enable = "avx512f")]
55625	unsafe fn test_mm512_maskz_or_epi32() {
55626	#[rustfmt::skip]
55627	let a = _mm512_set_epi32(
55628	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55629	`0`, `0`, `0`, `0`,
55630	`0`, `0`, `0`, `0`,
55631	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55632	);
55633	#[rustfmt::skip]
55634	let b = _mm512_set_epi32(
55635	`1` << `1`, `0`, `0`, `0`,
55636	`0`, `0`, `0`, `0`,
55637	`0`, `0`, `0`, `0`,
55638	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55639	);
55640	let r = _mm512_maskz_or_epi32(`0`, a, b);
55641	assert_eq_m512i(r, _mm512_setzero_si512());
55642	let r = _mm512_maskz_or_epi32(`0b00000000_11111111`, a, b);
55643	#[rustfmt::skip]
55644	let e = _mm512_set_epi32(
55645	`0`, `0`, `0`, `0`,
55646	`0`, `0`, `0`, `0`,
55647	`0`, `0`, `0`, `0`,
55648	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
55649	);
55650	assert_eq_m512i(r, e);
55651	}
55652
55653	#[simd_test(enable = "avx512f,avx512vl")]
55654	unsafe fn test_mm256_or_epi32() {
55655	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55656	let b = _mm256_set1_epi32(`1` << `1`);
55657	let r = _mm256_or_epi32(a, b);
55658	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55659	assert_eq_m256i(r, e);
55660	}
55661
55662	#[simd_test(enable = "avx512f,avx512vl")]
55663	unsafe fn test_mm256_mask_or_epi32() {
55664	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55665	let b = _mm256_set1_epi32(`1` << `1`);
55666	let r = _mm256_mask_or_epi32(a, `0`, a, b);
55667	assert_eq_m256i(r, a);
55668	let r = _mm256_mask_or_epi32(a, `0b11111111`, a, b);
55669	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55670	assert_eq_m256i(r, e);
55671	}
55672
55673	#[simd_test(enable = "avx512f,avx512vl")]
55674	unsafe fn test_mm256_maskz_or_epi32() {
55675	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55676	let b = _mm256_set1_epi32(`1` << `1`);
55677	let r = _mm256_maskz_or_epi32(`0`, a, b);
55678	assert_eq_m256i(r, _mm256_setzero_si256());
55679	let r = _mm256_maskz_or_epi32(`0b11111111`, a, b);
55680	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55681	assert_eq_m256i(r, e);
55682	}
55683
55684	#[simd_test(enable = "avx512f,avx512vl")]
55685	unsafe fn test_mm_or_epi32() {
55686	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55687	let b = _mm_set1_epi32(`1` << `1`);
55688	let r = _mm_or_epi32(a, b);
55689	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55690	assert_eq_m128i(r, e);
55691	}
55692
55693	#[simd_test(enable = "avx512f,avx512vl")]
55694	unsafe fn test_mm_mask_or_epi32() {
55695	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55696	let b = _mm_set1_epi32(`1` << `1`);
55697	let r = _mm_mask_or_epi32(a, `0`, a, b);
55698	assert_eq_m128i(r, a);
55699	let r = _mm_mask_or_epi32(a, `0b00001111`, a, b);
55700	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55701	assert_eq_m128i(r, e);
55702	}
55703
55704	#[simd_test(enable = "avx512f,avx512vl")]
55705	unsafe fn test_mm_maskz_or_epi32() {
55706	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55707	let b = _mm_set1_epi32(`1` << `1`);
55708	let r = _mm_maskz_or_epi32(`0`, a, b);
55709	assert_eq_m128i(r, _mm_setzero_si128());
55710	let r = _mm_maskz_or_epi32(`0b00001111`, a, b);
55711	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55712	assert_eq_m128i(r, e);
55713	}
55714
55715	#[simd_test(enable = "avx512f")]
55716	unsafe fn test_mm512_or_si512() {
55717	#[rustfmt::skip]
55718	let a = _mm512_set_epi32(
55719	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55720	`0`, `0`, `0`, `0`,
55721	`0`, `0`, `0`, `0`,
55722	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55723	);
55724	#[rustfmt::skip]
55725	let b = _mm512_set_epi32(
55726	`1` << `1`, `0`, `0`, `0`,
55727	`0`, `0`, `0`, `0`,
55728	`0`, `0`, `0`, `0`,
55729	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55730	);
55731	let r = _mm512_or_epi32(a, b);
55732	#[rustfmt::skip]
55733	let e = _mm512_set_epi32(
55734	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55735	`0`, `0`, `0`, `0`,
55736	`0`, `0`, `0`, `0`,
55737	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
55738	);
55739	assert_eq_m512i(r, e);
55740	}
55741
55742	#[simd_test(enable = "avx512f")]
55743	unsafe fn test_mm512_xor_epi32() {
55744	#[rustfmt::skip]
55745	let a = _mm512_set_epi32(
55746	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55747	`0`, `0`, `0`, `0`,
55748	`0`, `0`, `0`, `0`,
55749	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55750	);
55751	#[rustfmt::skip]
55752	let b = _mm512_set_epi32(
55753	`1` << `1`, `0`, `0`, `0`,
55754	`0`, `0`, `0`, `0`,
55755	`0`, `0`, `0`, `0`,
55756	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55757	);
55758	let r = _mm512_xor_epi32(a, b);
55759	#[rustfmt::skip]
55760	let e = _mm512_set_epi32(
55761	`1` << `2`, `0`, `0`, `0`,
55762	`0`, `0`, `0`, `0`,
55763	`0`, `0`, `0`, `0`,
55764	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
55765	);
55766	assert_eq_m512i(r, e);
55767	}
55768
55769	#[simd_test(enable = "avx512f")]
55770	unsafe fn test_mm512_mask_xor_epi32() {
55771	#[rustfmt::skip]
55772	let a = _mm512_set_epi32(
55773	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55774	`0`, `0`, `0`, `0`,
55775	`0`, `0`, `0`, `0`,
55776	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55777	);
55778	#[rustfmt::skip]
55779	let b = _mm512_set_epi32(
55780	`1` << `1`, `0`, `0`, `0`,
55781	`0`, `0`, `0`, `0`,
55782	`0`, `0`, `0`, `0`,
55783	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55784	);
55785	let r = _mm512_mask_xor_epi32(a, `0`, a, b);
55786	assert_eq_m512i(r, a);
55787	let r = _mm512_mask_xor_epi32(a, `0b01111111_11111111`, a, b);
55788	#[rustfmt::skip]
55789	let e = _mm512_set_epi32(
55790	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55791	`0`, `0`, `0`, `0`,
55792	`0`, `0`, `0`, `0`,
55793	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
55794	);
55795	assert_eq_m512i(r, e);
55796	}
55797
55798	#[simd_test(enable = "avx512f")]
55799	unsafe fn test_mm512_maskz_xor_epi32() {
55800	#[rustfmt::skip]
55801	let a = _mm512_set_epi32(
55802	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55803	`0`, `0`, `0`, `0`,
55804	`0`, `0`, `0`, `0`,
55805	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55806	);
55807	#[rustfmt::skip]
55808	let b = _mm512_set_epi32(
55809	`1` << `1`, `0`, `0`, `0`,
55810	`0`, `0`, `0`, `0`,
55811	`0`, `0`, `0`, `0`,
55812	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55813	);
55814	let r = _mm512_maskz_xor_epi32(`0`, a, b);
55815	assert_eq_m512i(r, _mm512_setzero_si512());
55816	let r = _mm512_maskz_xor_epi32(`0b00000000_11111111`, a, b);
55817	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `1` \| `1` << `4`);
55818	assert_eq_m512i(r, e);
55819	}
55820
55821	#[simd_test(enable = "avx512f,avx512vl")]
55822	unsafe fn test_mm256_xor_epi32() {
55823	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55824	let b = _mm256_set1_epi32(`1` << `1`);
55825	let r = _mm256_xor_epi32(a, b);
55826	let e = _mm256_set1_epi32(`1` << `2`);
55827	assert_eq_m256i(r, e);
55828	}
55829
55830	#[simd_test(enable = "avx512f,avx512vl")]
55831	unsafe fn test_mm256_mask_xor_epi32() {
55832	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55833	let b = _mm256_set1_epi32(`1` << `1`);
55834	let r = _mm256_mask_xor_epi32(a, `0`, a, b);
55835	assert_eq_m256i(r, a);
55836	let r = _mm256_mask_xor_epi32(a, `0b11111111`, a, b);
55837	let e = _mm256_set1_epi32(`1` << `2`);
55838	assert_eq_m256i(r, e);
55839	}
55840
55841	#[simd_test(enable = "avx512f,avx512vl")]
55842	unsafe fn test_mm256_maskz_xor_epi32() {
55843	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55844	let b = _mm256_set1_epi32(`1` << `1`);
55845	let r = _mm256_maskz_xor_epi32(`0`, a, b);
55846	assert_eq_m256i(r, _mm256_setzero_si256());
55847	let r = _mm256_maskz_xor_epi32(`0b11111111`, a, b);
55848	let e = _mm256_set1_epi32(`1` << `2`);
55849	assert_eq_m256i(r, e);
55850	}
55851
55852	#[simd_test(enable = "avx512f,avx512vl")]
55853	unsafe fn test_mm_xor_epi32() {
55854	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55855	let b = _mm_set1_epi32(`1` << `1`);
55856	let r = _mm_xor_epi32(a, b);
55857	let e = _mm_set1_epi32(`1` << `2`);
55858	assert_eq_m128i(r, e);
55859	}
55860
55861	#[simd_test(enable = "avx512f,avx512vl")]
55862	unsafe fn test_mm_mask_xor_epi32() {
55863	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55864	let b = _mm_set1_epi32(`1` << `1`);
55865	let r = _mm_mask_xor_epi32(a, `0`, a, b);
55866	assert_eq_m128i(r, a);
55867	let r = _mm_mask_xor_epi32(a, `0b00001111`, a, b);
55868	let e = _mm_set1_epi32(`1` << `2`);
55869	assert_eq_m128i(r, e);
55870	}
55871
55872	#[simd_test(enable = "avx512f,avx512vl")]
55873	unsafe fn test_mm_maskz_xor_epi32() {
55874	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55875	let b = _mm_set1_epi32(`1` << `1`);
55876	let r = _mm_maskz_xor_epi32(`0`, a, b);
55877	assert_eq_m128i(r, _mm_setzero_si128());
55878	let r = _mm_maskz_xor_epi32(`0b00001111`, a, b);
55879	let e = _mm_set1_epi32(`1` << `2`);
55880	assert_eq_m128i(r, e);
55881	}
55882
55883	#[simd_test(enable = "avx512f")]
55884	unsafe fn test_mm512_xor_si512() {
55885	#[rustfmt::skip]
55886	let a = _mm512_set_epi32(
55887	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
55888	`0`, `0`, `0`, `0`,
55889	`0`, `0`, `0`, `0`,
55890	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
55891	);
55892	#[rustfmt::skip]
55893	let b = _mm512_set_epi32(
55894	`1` << `1`, `0`, `0`, `0`,
55895	`0`, `0`, `0`, `0`,
55896	`0`, `0`, `0`, `0`,
55897	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
55898	);
55899	let r = _mm512_xor_epi32(a, b);
55900	#[rustfmt::skip]
55901	let e = _mm512_set_epi32(
55902	`1` << `2`, `0`, `0`, `0`,
55903	`0`, `0`, `0`, `0`,
55904	`0`, `0`, `0`, `0`,
55905	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
55906	);
55907	assert_eq_m512i(r, e);
55908	}
55909
55910	#[simd_test(enable = "avx512f")]
55911	unsafe fn test_mm512_andnot_epi32() {
55912	let a = _mm512_set1_epi32(`0`);
55913	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
55914	let r = _mm512_andnot_epi32(a, b);
55915	let e = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
55916	assert_eq_m512i(r, e);
55917	}
55918
55919	#[simd_test(enable = "avx512f")]
55920	unsafe fn test_mm512_mask_andnot_epi32() {
55921	let a = _mm512_set1_epi32(`1` << `1` \| `1` << `2`);
55922	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
55923	let r = _mm512_mask_andnot_epi32(a, `0`, a, b);
55924	assert_eq_m512i(r, a);
55925	let r = _mm512_mask_andnot_epi32(a, `0b11111111_11111111`, a, b);
55926	let e = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
55927	assert_eq_m512i(r, e);
55928	}
55929
55930	#[simd_test(enable = "avx512f")]
55931	unsafe fn test_mm512_maskz_andnot_epi32() {
55932	let a = _mm512_set1_epi32(`1` << `1` \| `1` << `2`);
55933	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
55934	let r = _mm512_maskz_andnot_epi32(`0`, a, b);
55935	assert_eq_m512i(r, _mm512_setzero_si512());
55936	let r = _mm512_maskz_andnot_epi32(`0b00000000_11111111`, a, b);
55937	#[rustfmt::skip]
55938	let e = _mm512_set_epi32(
55939	`0`, `0`, `0`, `0`,
55940	`0`, `0`, `0`, `0`,
55941	`1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`,
55942	`1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`,
55943	);
55944	assert_eq_m512i(r, e);
55945	}
55946
55947	#[simd_test(enable = "avx512f,avx512vl")]
55948	unsafe fn test_mm256_mask_andnot_epi32() {
55949	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55950	let b = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
55951	let r = _mm256_mask_andnot_epi32(a, `0`, a, b);
55952	assert_eq_m256i(r, a);
55953	let r = _mm256_mask_andnot_epi32(a, `0b11111111`, a, b);
55954	let e = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
55955	assert_eq_m256i(r, e);
55956	}
55957
55958	#[simd_test(enable = "avx512f,avx512vl")]
55959	unsafe fn test_mm256_maskz_andnot_epi32() {
55960	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
55961	let b = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
55962	let r = _mm256_maskz_andnot_epi32(`0`, a, b);
55963	assert_eq_m256i(r, _mm256_setzero_si256());
55964	let r = _mm256_maskz_andnot_epi32(`0b11111111`, a, b);
55965	let e = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
55966	assert_eq_m256i(r, e);
55967	}
55968
55969	#[simd_test(enable = "avx512f,avx512vl")]
55970	unsafe fn test_mm_mask_andnot_epi32() {
55971	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55972	let b = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
55973	let r = _mm_mask_andnot_epi32(a, `0`, a, b);
55974	assert_eq_m128i(r, a);
55975	let r = _mm_mask_andnot_epi32(a, `0b00001111`, a, b);
55976	let e = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
55977	assert_eq_m128i(r, e);
55978	}
55979
55980	#[simd_test(enable = "avx512f,avx512vl")]
55981	unsafe fn test_mm_maskz_andnot_epi32() {
55982	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
55983	let b = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
55984	let r = _mm_maskz_andnot_epi32(`0`, a, b);
55985	assert_eq_m128i(r, _mm_setzero_si128());
55986	let r = _mm_maskz_andnot_epi32(`0b00001111`, a, b);
55987	let e = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
55988	assert_eq_m128i(r, e);
55989	}
55990
55991	#[simd_test(enable = "avx512f")]
55992	unsafe fn test_cvtmask16_u32() {
55993	let a: __mmask16 = `0b11001100_00110011`;
55994	let r = _cvtmask16_u32(a);
55995	let e: u32 = `0b11001100_00110011`;
55996	assert_eq!(r, e);
55997	}
55998
55999	#[simd_test(enable = "avx512f")]
56000	unsafe fn test_cvtu32_mask16() {
56001	let a: u32 = `0b11001100_00110011`;
56002	let r = _cvtu32_mask16(a);
56003	let e: __mmask16 = `0b11001100_00110011`;
56004	assert_eq!(r, e);
56005	}
56006
56007	#[simd_test(enable = "avx512f")]
56008	unsafe fn test_mm512_kand() {
56009	let a: u16 = `0b11001100_00110011`;
56010	let b: u16 = `0b11001100_00110011`;
56011	let r = _mm512_kand(a, b);
56012	let e: u16 = `0b11001100_00110011`;
56013	assert_eq!(r, e);
56014	}
56015
56016	#[simd_test(enable = "avx512f")]
56017	unsafe fn test_kand_mask16() {
56018	let a: u16 = `0b11001100_00110011`;
56019	let b: u16 = `0b11001100_00110011`;
56020	let r = _kand_mask16(a, b);
56021	let e: u16 = `0b11001100_00110011`;
56022	assert_eq!(r, e);
56023	}
56024
56025	#[simd_test(enable = "avx512f")]
56026	unsafe fn test_mm512_kor() {
56027	let a: u16 = `0b11001100_00110011`;
56028	let b: u16 = `0b00101110_00001011`;
56029	let r = _mm512_kor(a, b);
56030	let e: u16 = `0b11101110_00111011`;
56031	assert_eq!(r, e);
56032	}
56033
56034	#[simd_test(enable = "avx512f")]
56035	unsafe fn test_kor_mask16() {
56036	let a: u16 = `0b11001100_00110011`;
56037	let b: u16 = `0b00101110_00001011`;
56038	let r = _kor_mask16(a, b);
56039	let e: u16 = `0b11101110_00111011`;
56040	assert_eq!(r, e);
56041	}
56042
56043	#[simd_test(enable = "avx512f")]
56044	unsafe fn test_mm512_kxor() {
56045	let a: u16 = `0b11001100_00110011`;
56046	let b: u16 = `0b00101110_00001011`;
56047	let r = _mm512_kxor(a, b);
56048	let e: u16 = `0b11100010_00111000`;
56049	assert_eq!(r, e);
56050	}
56051
56052	#[simd_test(enable = "avx512f")]
56053	unsafe fn test_kxor_mask16() {
56054	let a: u16 = `0b11001100_00110011`;
56055	let b: u16 = `0b00101110_00001011`;
56056	let r = _kxor_mask16(a, b);
56057	let e: u16 = `0b11100010_00111000`;
56058	assert_eq!(r, e);
56059	}
56060
56061	#[simd_test(enable = "avx512f")]
56062	unsafe fn test_mm512_knot() {
56063	let a: u16 = `0b11001100_00110011`;
56064	let r = _mm512_knot(a);
56065	let e: u16 = `0b00110011_11001100`;
56066	assert_eq!(r, e);
56067	}
56068
56069	#[simd_test(enable = "avx512f")]
56070	unsafe fn test_knot_mask16() {
56071	let a: u16 = `0b11001100_00110011`;
56072	let r = _knot_mask16(a);
56073	let e: u16 = `0b00110011_11001100`;
56074	assert_eq!(r, e);
56075	}
56076
56077	#[simd_test(enable = "avx512f")]
56078	unsafe fn test_mm512_kandn() {
56079	let a: u16 = `0b11001100_00110011`;
56080	let b: u16 = `0b00101110_00001011`;
56081	let r = _mm512_kandn(a, b);
56082	let e: u16 = `0b00100010_00001000`;
56083	assert_eq!(r, e);
56084	}
56085
56086	#[simd_test(enable = "avx512f")]
56087	unsafe fn test_kandn_mask16() {
56088	let a: u16 = `0b11001100_00110011`;
56089	let b: u16 = `0b00101110_00001011`;
56090	let r = _kandn_mask16(a, b);
56091	let e: u16 = `0b00100010_00001000`;
56092	assert_eq!(r, e);
56093	}
56094
56095	#[simd_test(enable = "avx512f")]
56096	unsafe fn test_mm512_kxnor() {
56097	let a: u16 = `0b11001100_00110011`;
56098	let b: u16 = `0b00101110_00001011`;
56099	let r = _mm512_kxnor(a, b);
56100	let e: u16 = `0b00011101_11000111`;
56101	assert_eq!(r, e);
56102	}
56103
56104	#[simd_test(enable = "avx512f")]
56105	unsafe fn test_kxnor_mask16() {
56106	let a: u16 = `0b11001100_00110011`;
56107	let b: u16 = `0b00101110_00001011`;
56108	let r = _kxnor_mask16(a, b);
56109	let e: u16 = `0b00011101_11000111`;
56110	assert_eq!(r, e);
56111	}
56112
56113	#[simd_test(enable = "avx512dq")]
56114	unsafe fn test_kortest_mask16_u8() {
56115	let a: __mmask16 = `0b0110100101101001`;
56116	let b: __mmask16 = `0b1011011010110110`;
56117	let mut all_ones: u8 = `0`;
56118	let r = _kortest_mask16_u8(a, b, &mut all_ones);
56119	assert_eq!(r, `0`);
56120	assert_eq!(all_ones, `1`);
56121	}
56122
56123	#[simd_test(enable = "avx512dq")]
56124	unsafe fn test_kortestc_mask16_u8() {
56125	let a: __mmask16 = `0b0110100101101001`;
56126	let b: __mmask16 = `0b1011011010110110`;
56127	let r = _kortestc_mask16_u8(a, b);
56128	assert_eq!(r, `1`);
56129	}
56130
56131	#[simd_test(enable = "avx512dq")]
56132	unsafe fn test_kortestz_mask16_u8() {
56133	let a: __mmask16 = `0b0110100101101001`;
56134	let b: __mmask16 = `0b1011011010110110`;
56135	let r = _kortestz_mask16_u8(a, b);
56136	assert_eq!(r, `0`);
56137	}
56138
56139	#[simd_test(enable = "avx512dq")]
56140	unsafe fn test_kshiftli_mask16() {
56141	let a: __mmask16 = `0b1001011011000011`;
56142	let r = _kshiftli_mask16::<`3`>(a);
56143	let e: __mmask16 = `0b1011011000011000`;
56144	assert_eq!(r, e);
56145	}
56146
56147	#[simd_test(enable = "avx512dq")]
56148	unsafe fn test_kshiftri_mask16() {
56149	let a: __mmask16 = `0b0110100100111100`;
56150	let r = _kshiftri_mask16::<`3`>(a);
56151	let e: __mmask16 = `0b0000110100100111`;
56152	assert_eq!(r, e);
56153	}
56154
56155	#[simd_test(enable = "avx512f")]
56156	unsafe fn test_load_mask16() {
56157	let a: __mmask16 = `0b1001011011000011`;
56158	let r = _load_mask16(&a);
56159	let e: __mmask16 = `0b1001011011000011`;
56160	assert_eq!(r, e);
56161	}
56162
56163	#[simd_test(enable = "avx512f")]
56164	unsafe fn test_store_mask16() {
56165	let a: __mmask16 = `0b0110100100111100`;
56166	let mut r = `0`;
56167	_store_mask16(&mut r, a);
56168	let e: __mmask16 = `0b0110100100111100`;
56169	assert_eq!(r, e);
56170	}
56171
56172	#[simd_test(enable = "avx512f")]
56173	unsafe fn test_mm512_kmov() {
56174	let a: u16 = `0b11001100_00110011`;
56175	let r = _mm512_kmov(a);
56176	let e: u16 = `0b11001100_00110011`;
56177	assert_eq!(r, e);
56178	}
56179
56180	#[simd_test(enable = "avx512f")]
56181	unsafe fn test_mm512_int2mask() {
56182	let a: i32 = `0b11001100_00110011`;
56183	let r = _mm512_int2mask(a);
56184	let e: u16 = `0b11001100_00110011`;
56185	assert_eq!(r, e);
56186	}
56187
56188	#[simd_test(enable = "avx512f")]
56189	unsafe fn test_mm512_mask2int() {
56190	let k1: __mmask16 = `0b11001100_00110011`;
56191	let r = _mm512_mask2int(k1);
56192	let e: i32 = `0b11001100_00110011`;
56193	assert_eq!(r, e);
56194	}
56195
56196	#[simd_test(enable = "avx512f")]
56197	unsafe fn test_mm512_kunpackb() {
56198	let a: u16 = `0b11001100_00110011`;
56199	let b: u16 = `0b00101110_00001011`;
56200	let r = _mm512_kunpackb(a, b);
56201	let e: u16 = `0b00110011_00001011`;
56202	assert_eq!(r, e);
56203	}
56204
56205	#[simd_test(enable = "avx512f")]
56206	unsafe fn test_mm512_kortestc() {
56207	let a: u16 = `0b11001100_00110011`;
56208	let b: u16 = `0b00101110_00001011`;
56209	let r = _mm512_kortestc(a, b);
56210	assert_eq!(r, `0`);
56211	let b: u16 = `0b11111111_11111111`;
56212	let r = _mm512_kortestc(a, b);
56213	assert_eq!(r, `1`);
56214	}
56215
56216	#[simd_test(enable = "avx512f")]
56217	unsafe fn test_mm512_kortestz() {
56218	let a: u16 = `0b11001100_00110011`;
56219	let b: u16 = `0b00101110_00001011`;
56220	let r = _mm512_kortestz(a, b);
56221	assert_eq!(r, `0`);
56222	let r = _mm512_kortestz(`0`, `0`);
56223	assert_eq!(r, `1`);
56224	}
56225
56226	#[simd_test(enable = "avx512f")]
56227	unsafe fn test_mm512_test_epi32_mask() {
56228	let a = _mm512_set1_epi32(`1` << `0`);
56229	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
56230	let r = _mm512_test_epi32_mask(a, b);
56231	let e: __mmask16 = `0b11111111_11111111`;
56232	assert_eq!(r, e);
56233	}
56234
56235	#[simd_test(enable = "avx512f")]
56236	unsafe fn test_mm512_mask_test_epi32_mask() {
56237	let a = _mm512_set1_epi32(`1` << `0`);
56238	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
56239	let r = _mm512_mask_test_epi32_mask(`0`, a, b);
56240	assert_eq!(r, `0`);
56241	let r = _mm512_mask_test_epi32_mask(`0b11111111_11111111`, a, b);
56242	let e: __mmask16 = `0b11111111_11111111`;
56243	assert_eq!(r, e);
56244	}
56245
56246	#[simd_test(enable = "avx512f,avx512vl")]
56247	unsafe fn test_mm256_test_epi32_mask() {
56248	let a = _mm256_set1_epi32(`1` << `0`);
56249	let b = _mm256_set1_epi32(`1` << `0` \| `1` << `1`);
56250	let r = _mm256_test_epi32_mask(a, b);
56251	let e: __mmask8 = `0b11111111`;
56252	assert_eq!(r, e);
56253	}
56254
56255	#[simd_test(enable = "avx512f,avx512vl")]
56256	unsafe fn test_mm256_mask_test_epi32_mask() {
56257	let a = _mm256_set1_epi32(`1` << `0`);
56258	let b = _mm256_set1_epi32(`1` << `0` \| `1` << `1`);
56259	let r = _mm256_mask_test_epi32_mask(`0`, a, b);
56260	assert_eq!(r, `0`);
56261	let r = _mm256_mask_test_epi32_mask(`0b11111111`, a, b);
56262	let e: __mmask8 = `0b11111111`;
56263	assert_eq!(r, e);
56264	}
56265
56266	#[simd_test(enable = "avx512f,avx512vl")]
56267	unsafe fn test_mm_test_epi32_mask() {
56268	let a = _mm_set1_epi32(`1` << `0`);
56269	let b = _mm_set1_epi32(`1` << `0` \| `1` << `1`);
56270	let r = _mm_test_epi32_mask(a, b);
56271	let e: __mmask8 = `0b00001111`;
56272	assert_eq!(r, e);
56273	}
56274
56275	#[simd_test(enable = "avx512f,avx512vl")]
56276	unsafe fn test_mm_mask_test_epi32_mask() {
56277	let a = _mm_set1_epi32(`1` << `0`);
56278	let b = _mm_set1_epi32(`1` << `0` \| `1` << `1`);
56279	let r = _mm_mask_test_epi32_mask(`0`, a, b);
56280	assert_eq!(r, `0`);
56281	let r = _mm_mask_test_epi32_mask(`0b11111111`, a, b);
56282	let e: __mmask8 = `0b00001111`;
56283	assert_eq!(r, e);
56284	}
56285
56286	#[simd_test(enable = "avx512f")]
56287	unsafe fn test_mm512_testn_epi32_mask() {
56288	let a = _mm512_set1_epi32(`1` << `0`);
56289	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
56290	let r = _mm512_testn_epi32_mask(a, b);
56291	let e: __mmask16 = `0b00000000_00000000`;
56292	assert_eq!(r, e);
56293	}
56294
56295	#[simd_test(enable = "avx512f")]
56296	unsafe fn test_mm512_mask_testn_epi32_mask() {
56297	let a = _mm512_set1_epi32(`1` << `0`);
56298	let b = _mm512_set1_epi32(`1` << `1`);
56299	let r = _mm512_mask_test_epi32_mask(`0`, a, b);
56300	assert_eq!(r, `0`);
56301	let r = _mm512_mask_testn_epi32_mask(`0b11111111_11111111`, a, b);
56302	let e: __mmask16 = `0b11111111_11111111`;
56303	assert_eq!(r, e);
56304	}
56305
56306	#[simd_test(enable = "avx512f,avx512vl")]
56307	unsafe fn test_mm256_testn_epi32_mask() {
56308	let a = _mm256_set1_epi32(`1` << `0`);
56309	let b = _mm256_set1_epi32(`1` << `1`);
56310	let r = _mm256_testn_epi32_mask(a, b);
56311	let e: __mmask8 = `0b11111111`;
56312	assert_eq!(r, e);
56313	}
56314
56315	#[simd_test(enable = "avx512f,avx512vl")]
56316	unsafe fn test_mm256_mask_testn_epi32_mask() {
56317	let a = _mm256_set1_epi32(`1` << `0`);
56318	let b = _mm256_set1_epi32(`1` << `1`);
56319	let r = _mm256_mask_test_epi32_mask(`0`, a, b);
56320	assert_eq!(r, `0`);
56321	let r = _mm256_mask_testn_epi32_mask(`0b11111111`, a, b);
56322	let e: __mmask8 = `0b11111111`;
56323	assert_eq!(r, e);
56324	}
56325
56326	#[simd_test(enable = "avx512f,avx512vl")]
56327	unsafe fn test_mm_testn_epi32_mask() {
56328	let a = _mm_set1_epi32(`1` << `0`);
56329	let b = _mm_set1_epi32(`1` << `1`);
56330	let r = _mm_testn_epi32_mask(a, b);
56331	let e: __mmask8 = `0b00001111`;
56332	assert_eq!(r, e);
56333	}
56334
56335	#[simd_test(enable = "avx512f,avx512vl")]
56336	unsafe fn test_mm_mask_testn_epi32_mask() {
56337	let a = _mm_set1_epi32(`1` << `0`);
56338	let b = _mm_set1_epi32(`1` << `1`);
56339	let r = _mm_mask_test_epi32_mask(`0`, a, b);
56340	assert_eq!(r, `0`);
56341	let r = _mm_mask_testn_epi32_mask(`0b11111111`, a, b);
56342	let e: __mmask8 = `0b00001111`;
56343	assert_eq!(r, e);
56344	}
56345
56346	#[simd_test(enable = "avx512f")]
56347	#[cfg_attr(miri, ignore)]
56348	unsafe fn test_mm512_stream_ps() {
56349	#[repr(align(`64`))]
56350	struct Memory {
56351	pub data: [f32; `16`], // 64 bytes
56352	}
56353	let a = _mm512_set1_ps(`7.0`);
56354	let mut mem = Memory { data: [`-1.0`; `16`] };
56355
56356	_mm512_stream_ps(&mut mem.data[`0`] as *mut f32, a);
56357	for i in `0`..`16` {
56358	assert_eq!(mem.data[i], get_m512(a, i));
56359	}
56360	}
56361
56362	#[simd_test(enable = "avx512f")]
56363	#[cfg_attr(miri, ignore)]
56364	unsafe fn test_mm512_stream_pd() {
56365	#[repr(align(`64`))]
56366	struct Memory {
56367	pub data: [f64; `8`],
56368	}
56369	let a = _mm512_set1_pd(`7.0`);
56370	let mut mem = Memory { data: [`-1.0`; `8`] };
56371
56372	_mm512_stream_pd(&mut mem.data[`0`] as *mut f64, a);
56373	for i in `0`..`8` {
56374	assert_eq!(mem.data[i], get_m512d(a, i));
56375	}
56376	}
56377
56378	#[simd_test(enable = "avx512f")]
56379	#[cfg_attr(miri, ignore)]
56380	unsafe fn test_mm512_stream_si512() {
56381	#[repr(align(`64`))]
56382	struct Memory {
56383	pub data: [i64; `8`],
56384	}
56385	let a = _mm512_set1_epi32(`7`);
56386	let mut mem = Memory { data: [`-1`; `8`] };
56387
56388	_mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56389	for i in `0`..`8` {
56390	assert_eq!(mem.data[i], get_m512i(a, i));
56391	}
56392	}
56393
56394	#[simd_test(enable = "avx512f")]
56395	unsafe fn test_mm512_stream_load_si512() {
56396	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56397	let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56398	assert_eq_m512i(a, r);
56399	}
56400
56401	#[simd_test(enable = "avx512f")]
56402	unsafe fn test_mm512_reduce_add_epi32() {
56403	let a = _mm512_set1_epi32(`1`);
56404	let e: i32 = _mm512_reduce_add_epi32(a);
56405	assert_eq!(`16`, e);
56406	}
56407
56408	#[simd_test(enable = "avx512f")]
56409	unsafe fn test_mm512_mask_reduce_add_epi32() {
56410	let a = _mm512_set1_epi32(`1`);
56411	let e: i32 = _mm512_mask_reduce_add_epi32(`0b11111111_00000000`, a);
56412	assert_eq!(`8`, e);
56413	}
56414
56415	#[simd_test(enable = "avx512f")]
56416	unsafe fn test_mm512_reduce_add_ps() {
56417	let a = _mm512_set1_ps(`1.`);
56418	let e: f32 = _mm512_reduce_add_ps(a);
56419	assert_eq!(`16.`, e);
56420	}
56421
56422	#[simd_test(enable = "avx512f")]
56423	unsafe fn test_mm512_mask_reduce_add_ps() {
56424	let a = _mm512_set1_ps(`1.`);
56425	let e: f32 = _mm512_mask_reduce_add_ps(`0b11111111_00000000`, a);
56426	assert_eq!(`8.`, e);
56427	}
56428
56429	#[simd_test(enable = "avx512f")]
56430	unsafe fn test_mm512_reduce_mul_epi32() {
56431	let a = _mm512_set1_epi32(`2`);
56432	let e: i32 = _mm512_reduce_mul_epi32(a);
56433	assert_eq!(`65536`, e);
56434	}
56435
56436	#[simd_test(enable = "avx512f")]
56437	unsafe fn test_mm512_mask_reduce_mul_epi32() {
56438	let a = _mm512_set1_epi32(`2`);
56439	let e: i32 = _mm512_mask_reduce_mul_epi32(`0b11111111_00000000`, a);
56440	assert_eq!(`256`, e);
56441	}
56442
56443	#[simd_test(enable = "avx512f")]
56444	unsafe fn test_mm512_reduce_mul_ps() {
56445	let a = _mm512_set1_ps(`2.`);
56446	let e: f32 = _mm512_reduce_mul_ps(a);
56447	assert_eq!(`65536.`, e);
56448	}
56449
56450	#[simd_test(enable = "avx512f")]
56451	unsafe fn test_mm512_mask_reduce_mul_ps() {
56452	let a = _mm512_set1_ps(`2.`);
56453	let e: f32 = _mm512_mask_reduce_mul_ps(`0b11111111_00000000`, a);
56454	assert_eq!(`256.`, e);
56455	}
56456
56457	#[simd_test(enable = "avx512f")]
56458	unsafe fn test_mm512_reduce_max_epi32() {
56459	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56460	let e: i32 = _mm512_reduce_max_epi32(a);
56461	assert_eq!(`15`, e);
56462	}
56463
56464	#[simd_test(enable = "avx512f")]
56465	unsafe fn test_mm512_mask_reduce_max_epi32() {
56466	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56467	let e: i32 = _mm512_mask_reduce_max_epi32(`0b11111111_00000000`, a);
56468	assert_eq!(`7`, e);
56469	}
56470
56471	#[simd_test(enable = "avx512f")]
56472	unsafe fn test_mm512_reduce_max_epu32() {
56473	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56474	let e: u32 = _mm512_reduce_max_epu32(a);
56475	assert_eq!(`15`, e);
56476	}
56477
56478	#[simd_test(enable = "avx512f")]
56479	unsafe fn test_mm512_mask_reduce_max_epu32() {
56480	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56481	let e: u32 = _mm512_mask_reduce_max_epu32(`0b11111111_00000000`, a);
56482	assert_eq!(`7`, e);
56483	}
56484
56485	#[simd_test(enable = "avx512f")]
56486	unsafe fn test_mm512_reduce_max_ps() {
56487	let a = _mm512_set_ps(
56488	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56489	);
56490	let e: f32 = _mm512_reduce_max_ps(a);
56491	assert_eq!(`15.`, e);
56492	}
56493
56494	#[simd_test(enable = "avx512f")]
56495	unsafe fn test_mm512_mask_reduce_max_ps() {
56496	let a = _mm512_set_ps(
56497	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56498	);
56499	let e: f32 = _mm512_mask_reduce_max_ps(`0b11111111_00000000`, a);
56500	assert_eq!(`7.`, e);
56501	}
56502
56503	#[simd_test(enable = "avx512f")]
56504	unsafe fn test_mm512_reduce_min_epi32() {
56505	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56506	let e: i32 = _mm512_reduce_min_epi32(a);
56507	assert_eq!(`0`, e);
56508	}
56509
56510	#[simd_test(enable = "avx512f")]
56511	unsafe fn test_mm512_mask_reduce_min_epi32() {
56512	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56513	let e: i32 = _mm512_mask_reduce_min_epi32(`0b11111111_00000000`, a);
56514	assert_eq!(`0`, e);
56515	}
56516
56517	#[simd_test(enable = "avx512f")]
56518	unsafe fn test_mm512_reduce_min_epu32() {
56519	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56520	let e: u32 = _mm512_reduce_min_epu32(a);
56521	assert_eq!(`0`, e);
56522	}
56523
56524	#[simd_test(enable = "avx512f")]
56525	unsafe fn test_mm512_mask_reduce_min_epu32() {
56526	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56527	let e: u32 = _mm512_mask_reduce_min_epu32(`0b11111111_00000000`, a);
56528	assert_eq!(`0`, e);
56529	}
56530
56531	#[simd_test(enable = "avx512f")]
56532	unsafe fn test_mm512_reduce_min_ps() {
56533	let a = _mm512_set_ps(
56534	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56535	);
56536	let e: f32 = _mm512_reduce_min_ps(a);
56537	assert_eq!(`0.`, e);
56538	}
56539
56540	#[simd_test(enable = "avx512f")]
56541	unsafe fn test_mm512_mask_reduce_min_ps() {
56542	let a = _mm512_set_ps(
56543	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56544	);
56545	let e: f32 = _mm512_mask_reduce_min_ps(`0b11111111_00000000`, a);
56546	assert_eq!(`0.`, e);
56547	}
56548
56549	#[simd_test(enable = "avx512f")]
56550	unsafe fn test_mm512_reduce_and_epi32() {
56551	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
56552	let e: i32 = _mm512_reduce_and_epi32(a);
56553	assert_eq!(`0`, e);
56554	}
56555
56556	#[simd_test(enable = "avx512f")]
56557	unsafe fn test_mm512_mask_reduce_and_epi32() {
56558	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
56559	let e: i32 = _mm512_mask_reduce_and_epi32(`0b11111111_00000000`, a);
56560	assert_eq!(`1`, e);
56561	}
56562
56563	#[simd_test(enable = "avx512f")]
56564	unsafe fn test_mm512_reduce_or_epi32() {
56565	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
56566	let e: i32 = _mm512_reduce_or_epi32(a);
56567	assert_eq!(`3`, e);
56568	}
56569
56570	#[simd_test(enable = "avx512f")]
56571	unsafe fn test_mm512_mask_reduce_or_epi32() {
56572	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
56573	let e: i32 = _mm512_mask_reduce_and_epi32(`0b11111111_00000000`, a);
56574	assert_eq!(`1`, e);
56575	}
56576
56577	#[simd_test(enable = "avx512f")]
56578	unsafe fn test_mm512_mask_compress_epi32() {
56579	let src = _mm512_set1_epi32(`200`);
56580	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56581	let r = _mm512_mask_compress_epi32(src, `0`, a);
56582	assert_eq_m512i(r, src);
56583	let r = _mm512_mask_compress_epi32(src, `0b01010101_01010101`, a);
56584	let e = _mm512_set_epi32(
56585	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
56586	);
56587	assert_eq_m512i(r, e);
56588	}
56589
56590	#[simd_test(enable = "avx512f")]
56591	unsafe fn test_mm512_maskz_compress_epi32() {
56592	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56593	let r = _mm512_maskz_compress_epi32(`0`, a);
56594	assert_eq_m512i(r, _mm512_setzero_si512());
56595	let r = _mm512_maskz_compress_epi32(`0b01010101_01010101`, a);
56596	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
56597	assert_eq_m512i(r, e);
56598	}
56599
56600	#[simd_test(enable = "avx512f,avx512vl")]
56601	unsafe fn test_mm256_mask_compress_epi32() {
56602	let src = _mm256_set1_epi32(`200`);
56603	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
56604	let r = _mm256_mask_compress_epi32(src, `0`, a);
56605	assert_eq_m256i(r, src);
56606	let r = _mm256_mask_compress_epi32(src, `0b01010101`, a);
56607	let e = _mm256_set_epi32(`200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`);
56608	assert_eq_m256i(r, e);
56609	}
56610
56611	#[simd_test(enable = "avx512f,avx512vl")]
56612	unsafe fn test_mm256_maskz_compress_epi32() {
56613	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
56614	let r = _mm256_maskz_compress_epi32(`0`, a);
56615	assert_eq_m256i(r, _mm256_setzero_si256());
56616	let r = _mm256_maskz_compress_epi32(`0b01010101`, a);
56617	let e = _mm256_set_epi32(`0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`);
56618	assert_eq_m256i(r, e);
56619	}
56620
56621	#[simd_test(enable = "avx512f,avx512vl")]
56622	unsafe fn test_mm_mask_compress_epi32() {
56623	let src = _mm_set1_epi32(`200`);
56624	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
56625	let r = _mm_mask_compress_epi32(src, `0`, a);
56626	assert_eq_m128i(r, src);
56627	let r = _mm_mask_compress_epi32(src, `0b00000101`, a);
56628	let e = _mm_set_epi32(`200`, `200`, `1`, `3`);
56629	assert_eq_m128i(r, e);
56630	}
56631
56632	#[simd_test(enable = "avx512f,avx512vl")]
56633	unsafe fn test_mm_maskz_compress_epi32() {
56634	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
56635	let r = _mm_maskz_compress_epi32(`0`, a);
56636	assert_eq_m128i(r, _mm_setzero_si128());
56637	let r = _mm_maskz_compress_epi32(`0b00000101`, a);
56638	let e = _mm_set_epi32(`0`, `0`, `1`, `3`);
56639	assert_eq_m128i(r, e);
56640	}
56641
56642	#[simd_test(enable = "avx512f")]
56643	unsafe fn test_mm512_mask_compress_ps() {
56644	let src = _mm512_set1_ps(`200.`);
56645	let a = _mm512_set_ps(
56646	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56647	);
56648	let r = _mm512_mask_compress_ps(src, `0`, a);
56649	assert_eq_m512(r, src);
56650	let r = _mm512_mask_compress_ps(src, `0b01010101_01010101`, a);
56651	let e = _mm512_set_ps(
56652	`200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`,
56653	);
56654	assert_eq_m512(r, e);
56655	}
56656
56657	#[simd_test(enable = "avx512f")]
56658	unsafe fn test_mm512_maskz_compress_ps() {
56659	let a = _mm512_set_ps(
56660	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56661	);
56662	let r = _mm512_maskz_compress_ps(`0`, a);
56663	assert_eq_m512(r, _mm512_setzero_ps());
56664	let r = _mm512_maskz_compress_ps(`0b01010101_01010101`, a);
56665	let e = _mm512_set_ps(
56666	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`,
56667	);
56668	assert_eq_m512(r, e);
56669	}
56670
56671	#[simd_test(enable = "avx512f,avx512vl")]
56672	unsafe fn test_mm256_mask_compress_ps() {
56673	let src = _mm256_set1_ps(`200.`);
56674	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
56675	let r = _mm256_mask_compress_ps(src, `0`, a);
56676	assert_eq_m256(r, src);
56677	let r = _mm256_mask_compress_ps(src, `0b01010101`, a);
56678	let e = _mm256_set_ps(`200.`, `200.`, `200.`, `200.`, `1.`, `3.`, `5.`, `7.`);
56679	assert_eq_m256(r, e);
56680	}
56681
56682	#[simd_test(enable = "avx512f,avx512vl")]
56683	unsafe fn test_mm256_maskz_compress_ps() {
56684	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
56685	let r = _mm256_maskz_compress_ps(`0`, a);
56686	assert_eq_m256(r, _mm256_setzero_ps());
56687	let r = _mm256_maskz_compress_ps(`0b01010101`, a);
56688	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `1.`, `3.`, `5.`, `7.`);
56689	assert_eq_m256(r, e);
56690	}
56691
56692	#[simd_test(enable = "avx512f,avx512vl")]
56693	unsafe fn test_mm_mask_compress_ps() {
56694	let src = _mm_set1_ps(`200.`);
56695	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56696	let r = _mm_mask_compress_ps(src, `0`, a);
56697	assert_eq_m128(r, src);
56698	let r = _mm_mask_compress_ps(src, `0b00000101`, a);
56699	let e = _mm_set_ps(`200.`, `200.`, `1.`, `3.`);
56700	assert_eq_m128(r, e);
56701	}
56702
56703	#[simd_test(enable = "avx512f,avx512vl")]
56704	unsafe fn test_mm_maskz_compress_ps() {
56705	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56706	let r = _mm_maskz_compress_ps(`0`, a);
56707	assert_eq_m128(r, _mm_setzero_ps());
56708	let r = _mm_maskz_compress_ps(`0b00000101`, a);
56709	let e = _mm_set_ps(`0.`, `0.`, `1.`, `3.`);
56710	assert_eq_m128(r, e);
56711	}
56712
56713	#[simd_test(enable = "avx512f")]
56714	unsafe fn test_mm512_mask_compressstoreu_epi32() {
56715	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56716	let mut r = [`0_i32`; `16`];
56717	_mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0`, a);
56718	assert_eq!(&r, &[`0_i32`; `16`]);
56719	_mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0b1111000011001010`, a);
56720	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
56721	}
56722
56723	#[simd_test(enable = "avx512f,avx512vl")]
56724	unsafe fn test_mm256_mask_compressstoreu_epi32() {
56725	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56726	let mut r = [`0_i32`; `8`];
56727	_mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0`, a);
56728	assert_eq!(&r, &[`0_i32`; `8`]);
56729	_mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0b11001010`, a);
56730	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `0`, `0`, `0`, `0`]);
56731	}
56732
56733	#[simd_test(enable = "avx512f,avx512vl")]
56734	unsafe fn test_mm_mask_compressstoreu_epi32() {
56735	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
56736	let mut r = [`0_i32`; `4`];
56737	_mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0`, a);
56738	assert_eq!(&r, &[`0_i32`; `4`]);
56739	_mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, `0b1011`, a);
56740	assert_eq!(&r, &[`1`, `2`, `4`, `0`]);
56741	}
56742
56743	#[simd_test(enable = "avx512f")]
56744	unsafe fn test_mm512_mask_compressstoreu_epi64() {
56745	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56746	let mut r = [`0_i64`; `8`];
56747	_mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0`, a);
56748	assert_eq!(&r, &[`0_i64`; `8`]);
56749	_mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0b11001010`, a);
56750	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `0`, `0`, `0`, `0`]);
56751	}
56752
56753	#[simd_test(enable = "avx512f,avx512vl")]
56754	unsafe fn test_mm256_mask_compressstoreu_epi64() {
56755	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
56756	let mut r = [`0_i64`; `4`];
56757	_mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0`, a);
56758	assert_eq!(&r, &[`0_i64`; `4`]);
56759	_mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0b1011`, a);
56760	assert_eq!(&r, &[`1`, `2`, `4`, `0`]);
56761	}
56762
56763	#[simd_test(enable = "avx512f,avx512vl")]
56764	unsafe fn test_mm_mask_compressstoreu_epi64() {
56765	let a = _mm_setr_epi64x(`1`, `2`);
56766	let mut r = [`0_i64`; `2`];
56767	_mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0`, a);
56768	assert_eq!(&r, &[`0_i64`; `2`]);
56769	_mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, `0b10`, a);
56770	assert_eq!(&r, &[`2`, `0`]);
56771	}
56772
56773	#[simd_test(enable = "avx512f")]
56774	unsafe fn test_mm512_mask_compressstoreu_ps() {
56775	let a = _mm512_setr_ps(
56776	`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`, `9_f32`, `10_f32`, `11_f32`, `12_f32`,
56777	`13_f32`, `14_f32`, `15_f32`, `16_f32`,
56778	);
56779	let mut r = [`0_f32`; `16`];
56780	_mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0`, a);
56781	assert_eq!(&r, &[`0_f32`; `16`]);
56782	_mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0b1111000011001010`, a);
56783	assert_eq!(
56784	&r,
56785	&[
56786	`2_f32`, `4_f32`, `7_f32`, `8_f32`, `13_f32`, `14_f32`, `15_f32`, `16_f32`, `0_f32`, `0_f32`, `0_f32`,
56787	`0_f32`, `0_f32`, `0_f32`, `0_f32`, `0_f32`
56788	]
56789	);
56790	}
56791
56792	#[simd_test(enable = "avx512f,avx512vl")]
56793	unsafe fn test_mm256_mask_compressstoreu_ps() {
56794	let a = _mm256_setr_ps(`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`);
56795	let mut r = [`0_f32`; `8`];
56796	_mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0`, a);
56797	assert_eq!(&r, &[`0_f32`; `8`]);
56798	_mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0b11001010`, a);
56799	assert_eq!(
56800	&r,
56801	&[`2_f32`, `4_f32`, `7_f32`, `8_f32`, `0_f32`, `0_f32`, `0_f32`, `0_f32`]
56802	);
56803	}
56804
56805	#[simd_test(enable = "avx512f,avx512vl")]
56806	unsafe fn test_mm_mask_compressstoreu_ps() {
56807	let a = _mm_setr_ps(`1_f32`, `2_f32`, `3_f32`, `4_f32`);
56808	let mut r = [`0.`; `4`];
56809	_mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0`, a);
56810	assert_eq!(&r, &[`0.`; `4`]);
56811	_mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, `0b1011`, a);
56812	assert_eq!(&r, &[`1_f32`, `2_f32`, `4_f32`, `0_f32`]);
56813	}
56814
56815	#[simd_test(enable = "avx512f")]
56816	unsafe fn test_mm512_mask_compressstoreu_pd() {
56817	let a = _mm512_setr_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56818	let mut r = [`0.`; `8`];
56819	_mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0`, a);
56820	assert_eq!(&r, &[`0.`; `8`]);
56821	_mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0b11001010`, a);
56822	assert_eq!(&r, &[`2.`, `4.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`]);
56823	}
56824
56825	#[simd_test(enable = "avx512f,avx512vl")]
56826	unsafe fn test_mm256_mask_compressstoreu_pd() {
56827	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
56828	let mut r = [`0.`; `4`];
56829	_mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0`, a);
56830	assert_eq!(&r, &[`0.`; `4`]);
56831	_mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0b1011`, a);
56832	assert_eq!(&r, &[`1.`, `2.`, `4.`, `0.`]);
56833	}
56834
56835	#[simd_test(enable = "avx512f,avx512vl")]
56836	unsafe fn test_mm_mask_compressstoreu_pd() {
56837	let a = _mm_setr_pd(`1.`, `2.`);
56838	let mut r = [`0.`; `2`];
56839	_mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0`, a);
56840	assert_eq!(&r, &[`0.`; `2`]);
56841	_mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, `0b10`, a);
56842	assert_eq!(&r, &[`2.`, `0.`]);
56843	}
56844
56845	#[simd_test(enable = "avx512f")]
56846	unsafe fn test_mm512_mask_expand_epi32() {
56847	let src = _mm512_set1_epi32(`200`);
56848	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56849	let r = _mm512_mask_expand_epi32(src, `0`, a);
56850	assert_eq_m512i(r, src);
56851	let r = _mm512_mask_expand_epi32(src, `0b01010101_01010101`, a);
56852	let e = _mm512_set_epi32(
56853	`200`, `8`, `200`, `9`, `200`, `10`, `200`, `11`, `200`, `12`, `200`, `13`, `200`, `14`, `200`, `15`,
56854	);
56855	assert_eq_m512i(r, e);
56856	}
56857
56858	#[simd_test(enable = "avx512f")]
56859	unsafe fn test_mm512_maskz_expand_epi32() {
56860	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
56861	let r = _mm512_maskz_expand_epi32(`0`, a);
56862	assert_eq_m512i(r, _mm512_setzero_si512());
56863	let r = _mm512_maskz_expand_epi32(`0b01010101_01010101`, a);
56864	let e = _mm512_set_epi32(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
56865	assert_eq_m512i(r, e);
56866	}
56867
56868	#[simd_test(enable = "avx512f,avx512vl")]
56869	unsafe fn test_mm256_mask_expand_epi32() {
56870	let src = _mm256_set1_epi32(`200`);
56871	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
56872	let r = _mm256_mask_expand_epi32(src, `0`, a);
56873	assert_eq_m256i(r, src);
56874	let r = _mm256_mask_expand_epi32(src, `0b01010101`, a);
56875	let e = _mm256_set_epi32(`200`, `4`, `200`, `5`, `200`, `6`, `200`, `7`);
56876	assert_eq_m256i(r, e);
56877	}
56878
56879	#[simd_test(enable = "avx512f,avx512vl")]
56880	unsafe fn test_mm256_maskz_expand_epi32() {
56881	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
56882	let r = _mm256_maskz_expand_epi32(`0`, a);
56883	assert_eq_m256i(r, _mm256_setzero_si256());
56884	let r = _mm256_maskz_expand_epi32(`0b01010101`, a);
56885	let e = _mm256_set_epi32(`0`, `4`, `0`, `5`, `0`, `6`, `0`, `7`);
56886	assert_eq_m256i(r, e);
56887	}
56888
56889	#[simd_test(enable = "avx512f,avx512vl")]
56890	unsafe fn test_mm_mask_expand_epi32() {
56891	let src = _mm_set1_epi32(`200`);
56892	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
56893	let r = _mm_mask_expand_epi32(src, `0`, a);
56894	assert_eq_m128i(r, src);
56895	let r = _mm_mask_expand_epi32(src, `0b00000101`, a);
56896	let e = _mm_set_epi32(`200`, `2`, `200`, `3`);
56897	assert_eq_m128i(r, e);
56898	}
56899
56900	#[simd_test(enable = "avx512f,avx512vl")]
56901	unsafe fn test_mm_maskz_expand_epi32() {
56902	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
56903	let r = _mm_maskz_expand_epi32(`0`, a);
56904	assert_eq_m128i(r, _mm_setzero_si128());
56905	let r = _mm_maskz_expand_epi32(`0b00000101`, a);
56906	let e = _mm_set_epi32(`0`, `2`, `0`, `3`);
56907	assert_eq_m128i(r, e);
56908	}
56909
56910	#[simd_test(enable = "avx512f")]
56911	unsafe fn test_mm512_mask_expand_ps() {
56912	let src = _mm512_set1_ps(`200.`);
56913	let a = _mm512_set_ps(
56914	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56915	);
56916	let r = _mm512_mask_expand_ps(src, `0`, a);
56917	assert_eq_m512(r, src);
56918	let r = _mm512_mask_expand_ps(src, `0b01010101_01010101`, a);
56919	let e = _mm512_set_ps(
56920	`200.`, `8.`, `200.`, `9.`, `200.`, `10.`, `200.`, `11.`, `200.`, `12.`, `200.`, `13.`, `200.`, `14.`, `200.`, `15.`,
56921	);
56922	assert_eq_m512(r, e);
56923	}
56924
56925	#[simd_test(enable = "avx512f")]
56926	unsafe fn test_mm512_maskz_expand_ps() {
56927	let a = _mm512_set_ps(
56928	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
56929	);
56930	let r = _mm512_maskz_expand_ps(`0`, a);
56931	assert_eq_m512(r, _mm512_setzero_ps());
56932	let r = _mm512_maskz_expand_ps(`0b01010101_01010101`, a);
56933	let e = _mm512_set_ps(
56934	`0.`, `8.`, `0.`, `9.`, `0.`, `10.`, `0.`, `11.`, `0.`, `12.`, `0.`, `13.`, `0.`, `14.`, `0.`, `15.`,
56935	);
56936	assert_eq_m512(r, e);
56937	}
56938
56939	#[simd_test(enable = "avx512f,avx512vl")]
56940	unsafe fn test_mm256_mask_expand_ps() {
56941	let src = _mm256_set1_ps(`200.`);
56942	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
56943	let r = _mm256_mask_expand_ps(src, `0`, a);
56944	assert_eq_m256(r, src);
56945	let r = _mm256_mask_expand_ps(src, `0b01010101`, a);
56946	let e = _mm256_set_ps(`200.`, `4.`, `200.`, `5.`, `200.`, `6.`, `200.`, `7.`);
56947	assert_eq_m256(r, e);
56948	}
56949
56950	#[simd_test(enable = "avx512f,avx512vl")]
56951	unsafe fn test_mm256_maskz_expand_ps() {
56952	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
56953	let r = _mm256_maskz_expand_ps(`0`, a);
56954	assert_eq_m256(r, _mm256_setzero_ps());
56955	let r = _mm256_maskz_expand_ps(`0b01010101`, a);
56956	let e = _mm256_set_ps(`0.`, `4.`, `0.`, `5.`, `0.`, `6.`, `0.`, `7.`);
56957	assert_eq_m256(r, e);
56958	}
56959
56960	#[simd_test(enable = "avx512f,avx512vl")]
56961	unsafe fn test_mm_mask_expand_ps() {
56962	let src = _mm_set1_ps(`200.`);
56963	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56964	let r = _mm_mask_expand_ps(src, `0`, a);
56965	assert_eq_m128(r, src);
56966	let r = _mm_mask_expand_ps(src, `0b00000101`, a);
56967	let e = _mm_set_ps(`200.`, `2.`, `200.`, `3.`);
56968	assert_eq_m128(r, e);
56969	}
56970
56971	#[simd_test(enable = "avx512f,avx512vl")]
56972	unsafe fn test_mm_maskz_expand_ps() {
56973	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
56974	let r = _mm_maskz_expand_ps(`0`, a);
56975	assert_eq_m128(r, _mm_setzero_ps());
56976	let r = _mm_maskz_expand_ps(`0b00000101`, a);
56977	let e = _mm_set_ps(`0.`, `2.`, `0.`, `3.`);
56978	assert_eq_m128(r, e);
56979	}
56980
56981	#[simd_test(enable = "avx512f")]
56982	unsafe fn test_mm512_loadu_epi32() {
56983	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`];
56984	let p = a.as_ptr();
56985	let r = _mm512_loadu_epi32(black_box(p));
56986	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
56987	assert_eq_m512i(r, e);
56988	}
56989
56990	#[simd_test(enable = "avx512f,avx512vl")]
56991	unsafe fn test_mm256_loadu_epi32() {
56992	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`];
56993	let p = a.as_ptr();
56994	let r = _mm256_loadu_epi32(black_box(p));
56995	let e = _mm256_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`);
56996	assert_eq_m256i(r, e);
56997	}
56998
56999	#[simd_test(enable = "avx512f,avx512vl")]
57000	unsafe fn test_mm_loadu_epi32() {
57001	let a = &[`4`, `3`, `2`, `5`];
57002	let p = a.as_ptr();
57003	let r = _mm_loadu_epi32(black_box(p));
57004	let e = _mm_setr_epi32(`4`, `3`, `2`, `5`);
57005	assert_eq_m128i(r, e);
57006	}
57007
57008	#[simd_test(enable = "avx512f")]
57009	unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57010	let a = _mm512_set1_epi32(`9`);
57011	let mut r = _mm256_undefined_si256();
57012	_mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
57013	let e = _mm256_set1_epi16(`9`);
57014	assert_eq_m256i(r, e);
57015	}
57016
57017	#[simd_test(enable = "avx512f,avx512vl")]
57018	unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57019	let a = _mm256_set1_epi32(`9`);
57020	let mut r = _mm_undefined_si128();
57021	_mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57022	let e = _mm_set1_epi16(`9`);
57023	assert_eq_m128i(r, e);
57024	}
57025
57026	#[simd_test(enable = "avx512f,avx512vl")]
57027	unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57028	let a = _mm_set1_epi32(`9`);
57029	let mut r = _mm_set1_epi8(`0`);
57030	_mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57031	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`);
57032	assert_eq_m128i(r, e);
57033	}
57034
57035	#[simd_test(enable = "avx512f")]
57036	unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57037	let a = _mm512_set1_epi32(i32::MAX);
57038	let mut r = _mm256_undefined_si256();
57039	_mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
57040	let e = _mm256_set1_epi16(i16::MAX);
57041	assert_eq_m256i(r, e);
57042	}
57043
57044	#[simd_test(enable = "avx512f,avx512vl")]
57045	unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57046	let a = _mm256_set1_epi32(i32::MAX);
57047	let mut r = _mm_undefined_si128();
57048	_mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57049	let e = _mm_set1_epi16(i16::MAX);
57050	assert_eq_m128i(r, e);
57051	}
57052
57053	#[simd_test(enable = "avx512f,avx512vl")]
57054	unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57055	let a = _mm_set1_epi32(i32::MAX);
57056	let mut r = _mm_set1_epi8(`0`);
57057	_mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57058	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57059	assert_eq_m128i(r, e);
57060	}
57061
57062	#[simd_test(enable = "avx512f")]
57063	unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57064	let a = _mm512_set1_epi32(i32::MAX);
57065	let mut r = _mm256_undefined_si256();
57066	_mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
57067	let e = _mm256_set1_epi16(u16::MAX as i16);
57068	assert_eq_m256i(r, e);
57069	}
57070
57071	#[simd_test(enable = "avx512f,avx512vl")]
57072	unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57073	let a = _mm256_set1_epi32(i32::MAX);
57074	let mut r = _mm_undefined_si128();
57075	_mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57076	let e = _mm_set1_epi16(u16::MAX as i16);
57077	assert_eq_m128i(r, e);
57078	}
57079
57080	#[simd_test(enable = "avx512f,avx512vl")]
57081	unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57082	let a = _mm_set1_epi32(i32::MAX);
57083	let mut r = _mm_set1_epi8(`0`);
57084	_mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57085	let e = _mm_set_epi16(
57086	`0`,
57087	`0`,
57088	`0`,
57089	`0`,
57090	u16::MAX as i16,
57091	u16::MAX as i16,
57092	u16::MAX as i16,
57093	u16::MAX as i16,
57094	);
57095	assert_eq_m128i(r, e);
57096	}
57097
57098	#[simd_test(enable = "avx512f")]
57099	unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57100	let a = _mm512_set1_epi32(`9`);
57101	let mut r = _mm_undefined_si128();
57102	_mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
57103	let e = _mm_set1_epi8(`9`);
57104	assert_eq_m128i(r, e);
57105	}
57106
57107	#[simd_test(enable = "avx512f,avx512vl")]
57108	unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57109	let a = _mm256_set1_epi32(`9`);
57110	let mut r = _mm_set1_epi8(`0`);
57111	_mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57112	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`, `9`, `9`, `9`, `9`);
57113	assert_eq_m128i(r, e);
57114	}
57115
57116	#[simd_test(enable = "avx512f,avx512vl")]
57117	unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57118	let a = _mm_set1_epi32(`9`);
57119	let mut r = _mm_set1_epi8(`0`);
57120	_mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57121	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`);
57122	assert_eq_m128i(r, e);
57123	}
57124
57125	#[simd_test(enable = "avx512f")]
57126	unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57127	let a = _mm512_set1_epi32(i32::MAX);
57128	let mut r = _mm_undefined_si128();
57129	_mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
57130	let e = _mm_set1_epi8(i8::MAX);
57131	assert_eq_m128i(r, e);
57132	}
57133
57134	#[simd_test(enable = "avx512f,avx512vl")]
57135	unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57136	let a = _mm256_set1_epi32(i32::MAX);
57137	let mut r = _mm_set1_epi8(`0`);
57138	_mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57139	#[rustfmt::skip]
57140	let e = _mm_set_epi8(
57141	`0`, `0`, `0`, `0`,
57142	`0`, `0`, `0`, `0`,
57143	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57144	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57145	);
57146	assert_eq_m128i(r, e);
57147	}
57148
57149	#[simd_test(enable = "avx512f,avx512vl")]
57150	unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57151	let a = _mm_set1_epi32(i32::MAX);
57152	let mut r = _mm_set1_epi8(`0`);
57153	_mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57154	#[rustfmt::skip]
57155	let e = _mm_set_epi8(
57156	`0`, `0`, `0`, `0`,
57157	`0`, `0`, `0`, `0`,
57158	`0`, `0`, `0`, `0`,
57159	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57160	);
57161	assert_eq_m128i(r, e);
57162	}
57163
57164	#[simd_test(enable = "avx512f")]
57165	unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57166	let a = _mm512_set1_epi32(i32::MAX);
57167	let mut r = _mm_undefined_si128();
57168	_mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
57169	let e = _mm_set1_epi8(u8::MAX as i8);
57170	assert_eq_m128i(r, e);
57171	}
57172
57173	#[simd_test(enable = "avx512f,avx512vl")]
57174	unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57175	let a = _mm256_set1_epi32(i32::MAX);
57176	let mut r = _mm_set1_epi8(`0`);
57177	_mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57178	#[rustfmt::skip]
57179	let e = _mm_set_epi8(
57180	`0`, `0`, `0`, `0`,
57181	`0`, `0`, `0`, `0`,
57182	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57183	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57184	);
57185	assert_eq_m128i(r, e);
57186	}
57187
57188	#[simd_test(enable = "avx512f,avx512vl")]
57189	unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57190	let a = _mm_set1_epi32(i32::MAX);
57191	let mut r = _mm_set1_epi8(`0`);
57192	_mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
57193	#[rustfmt::skip]
57194	let e = _mm_set_epi8(
57195	`0`, `0`, `0`, `0`,
57196	`0`, `0`, `0`, `0`,
57197	`0`, `0`, `0`, `0`,
57198	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57199	);
57200	assert_eq_m128i(r, e);
57201	}
57202
57203	#[simd_test(enable = "avx512f")]
57204	unsafe fn test_mm512_storeu_epi32() {
57205	let a = _mm512_set1_epi32(`9`);
57206	let mut r = _mm512_undefined_epi32();
57207	_mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57208	assert_eq_m512i(r, a);
57209	}
57210
57211	#[simd_test(enable = "avx512f,avx512vl")]
57212	unsafe fn test_mm256_storeu_epi32() {
57213	let a = _mm256_set1_epi32(`9`);
57214	let mut r = _mm256_undefined_si256();
57215	_mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57216	assert_eq_m256i(r, a);
57217	}
57218
57219	#[simd_test(enable = "avx512f,avx512vl")]
57220	unsafe fn test_mm_storeu_epi32() {
57221	let a = _mm_set1_epi32(`9`);
57222	let mut r = _mm_undefined_si128();
57223	_mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57224	assert_eq_m128i(r, a);
57225	}
57226
57227	#[simd_test(enable = "avx512f")]
57228	unsafe fn test_mm512_loadu_si512() {
57229	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`];
57230	let p = a.as_ptr();
57231	let r = _mm512_loadu_si512(black_box(p));
57232	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
57233	assert_eq_m512i(r, e);
57234	}
57235
57236	#[simd_test(enable = "avx512f")]
57237	unsafe fn test_mm512_storeu_si512() {
57238	let a = _mm512_set1_epi32(`9`);
57239	let mut r = _mm512_undefined_epi32();
57240	_mm512_storeu_si512(&mut r as *mut _, a);
57241	assert_eq_m512i(r, a);
57242	}
57243
57244	#[simd_test(enable = "avx512f")]
57245	unsafe fn test_mm512_load_si512() {
57246	#[repr(align(`64`))]
57247	struct Align {
57248	data: [i32; `16`], // 64 bytes
57249	}
57250	let a = Align {
57251	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`],
57252	};
57253	let p = (a.data).as_ptr();
57254	let r = _mm512_load_si512(black_box(p));
57255	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
57256	assert_eq_m512i(r, e);
57257	}
57258
57259	#[simd_test(enable = "avx512f")]
57260	unsafe fn test_mm512_store_si512() {
57261	let a = _mm512_set1_epi32(`9`);
57262	let mut r = _mm512_undefined_epi32();
57263	_mm512_store_si512(&mut r as *mut _, a);
57264	assert_eq_m512i(r, a);
57265	}
57266
57267	#[simd_test(enable = "avx512f")]
57268	unsafe fn test_mm512_load_epi32() {
57269	#[repr(align(`64`))]
57270	struct Align {
57271	data: [i32; `16`], // 64 bytes
57272	}
57273	let a = Align {
57274	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`],
57275	};
57276	let p = (a.data).as_ptr();
57277	let r = _mm512_load_epi32(black_box(p));
57278	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
57279	assert_eq_m512i(r, e);
57280	}
57281
57282	#[simd_test(enable = "avx512f,avx512vl")]
57283	unsafe fn test_mm256_load_epi32() {
57284	#[repr(align(`64`))]
57285	struct Align {
57286	data: [i32; `8`],
57287	}
57288	let a = Align {
57289	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`],
57290	};
57291	let p = (a.data).as_ptr();
57292	let r = _mm256_load_epi32(black_box(p));
57293	let e = _mm256_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`);
57294	assert_eq_m256i(r, e);
57295	}
57296
57297	#[simd_test(enable = "avx512f,avx512vl")]
57298	unsafe fn test_mm_load_epi32() {
57299	#[repr(align(`64`))]
57300	struct Align {
57301	data: [i32; `4`],
57302	}
57303	let a = Align { data: [`4`, `3`, `2`, `5`] };
57304	let p = (a.data).as_ptr();
57305	let r = _mm_load_epi32(black_box(p));
57306	let e = _mm_setr_epi32(`4`, `3`, `2`, `5`);
57307	assert_eq_m128i(r, e);
57308	}
57309
57310	#[simd_test(enable = "avx512f")]
57311	unsafe fn test_mm512_store_epi32() {
57312	let a = _mm512_set1_epi32(`9`);
57313	let mut r = _mm512_undefined_epi32();
57314	_mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57315	assert_eq_m512i(r, a);
57316	}
57317
57318	#[simd_test(enable = "avx512f,avx512vl")]
57319	unsafe fn test_mm256_store_epi32() {
57320	let a = _mm256_set1_epi32(`9`);
57321	let mut r = _mm256_undefined_si256();
57322	_mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57323	assert_eq_m256i(r, a);
57324	}
57325
57326	#[simd_test(enable = "avx512f,avx512vl")]
57327	unsafe fn test_mm_store_epi32() {
57328	let a = _mm_set1_epi32(`9`);
57329	let mut r = _mm_undefined_si128();
57330	_mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57331	assert_eq_m128i(r, a);
57332	}
57333
57334	#[simd_test(enable = "avx512f")]
57335	unsafe fn test_mm512_load_ps() {
57336	#[repr(align(`64`))]
57337	struct Align {
57338	data: [f32; `16`], // 64 bytes
57339	}
57340	let a = Align {
57341	data: [
57342	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
57343	],
57344	};
57345	let p = (a.data).as_ptr();
57346	let r = _mm512_load_ps(black_box(p));
57347	let e = _mm512_setr_ps(
57348	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
57349	);
57350	assert_eq_m512(r, e);
57351	}
57352
57353	#[simd_test(enable = "avx512f")]
57354	unsafe fn test_mm512_store_ps() {
57355	let a = _mm512_set1_ps(`9.`);
57356	let mut r = _mm512_undefined_ps();
57357	_mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57358	assert_eq_m512(r, a);
57359	}
57360
57361	#[simd_test(enable = "avx512f")]
57362	unsafe fn test_mm512_mask_set1_epi32() {
57363	let src = _mm512_set1_epi32(`2`);
57364	let a: i32 = `11`;
57365	let r = _mm512_mask_set1_epi32(src, `0`, a);
57366	assert_eq_m512i(r, src);
57367	let r = _mm512_mask_set1_epi32(src, `0b11111111_11111111`, a);
57368	let e = _mm512_set1_epi32(`11`);
57369	assert_eq_m512i(r, e);
57370	}
57371
57372	#[simd_test(enable = "avx512f")]
57373	unsafe fn test_mm512_maskz_set1_epi32() {
57374	let a: i32 = `11`;
57375	let r = _mm512_maskz_set1_epi32(`0`, a);
57376	assert_eq_m512i(r, _mm512_setzero_si512());
57377	let r = _mm512_maskz_set1_epi32(`0b11111111_11111111`, a);
57378	let e = _mm512_set1_epi32(`11`);
57379	assert_eq_m512i(r, e);
57380	}
57381
57382	#[simd_test(enable = "avx512f,avx512vl")]
57383	unsafe fn test_mm256_mask_set1_epi32() {
57384	let src = _mm256_set1_epi32(`2`);
57385	let a: i32 = `11`;
57386	let r = _mm256_mask_set1_epi32(src, `0`, a);
57387	assert_eq_m256i(r, src);
57388	let r = _mm256_mask_set1_epi32(src, `0b11111111`, a);
57389	let e = _mm256_set1_epi32(`11`);
57390	assert_eq_m256i(r, e);
57391	}
57392
57393	#[simd_test(enable = "avx512f")]
57394	unsafe fn test_mm256_maskz_set1_epi32() {
57395	let a: i32 = `11`;
57396	let r = _mm256_maskz_set1_epi32(`0`, a);
57397	assert_eq_m256i(r, _mm256_setzero_si256());
57398	let r = _mm256_maskz_set1_epi32(`0b11111111`, a);
57399	let e = _mm256_set1_epi32(`11`);
57400	assert_eq_m256i(r, e);
57401	}
57402
57403	#[simd_test(enable = "avx512f,avx512vl")]
57404	unsafe fn test_mm_mask_set1_epi32() {
57405	let src = _mm_set1_epi32(`2`);
57406	let a: i32 = `11`;
57407	let r = _mm_mask_set1_epi32(src, `0`, a);
57408	assert_eq_m128i(r, src);
57409	let r = _mm_mask_set1_epi32(src, `0b00001111`, a);
57410	let e = _mm_set1_epi32(`11`);
57411	assert_eq_m128i(r, e);
57412	}
57413
57414	#[simd_test(enable = "avx512f")]
57415	unsafe fn test_mm_maskz_set1_epi32() {
57416	let a: i32 = `11`;
57417	let r = _mm_maskz_set1_epi32(`0`, a);
57418	assert_eq_m128i(r, _mm_setzero_si128());
57419	let r = _mm_maskz_set1_epi32(`0b00001111`, a);
57420	let e = _mm_set1_epi32(`11`);
57421	assert_eq_m128i(r, e);
57422	}
57423
57424	#[simd_test(enable = "avx512f")]
57425	unsafe fn test_mm_mask_move_ss() {
57426	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57427	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57428	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57429	let r = _mm_mask_move_ss(src, `0`, a, b);
57430	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57431	assert_eq_m128(r, e);
57432	let r = _mm_mask_move_ss(src, `0b11111111`, a, b);
57433	let e = _mm_set_ps(`1.`, `2.`, `10.`, `40.`);
57434	assert_eq_m128(r, e);
57435	}
57436
57437	#[simd_test(enable = "avx512f")]
57438	unsafe fn test_mm_maskz_move_ss() {
57439	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57440	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57441	let r = _mm_maskz_move_ss(`0`, a, b);
57442	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57443	assert_eq_m128(r, e);
57444	let r = _mm_maskz_move_ss(`0b11111111`, a, b);
57445	let e = _mm_set_ps(`1.`, `2.`, `10.`, `40.`);
57446	assert_eq_m128(r, e);
57447	}
57448
57449	#[simd_test(enable = "avx512f")]
57450	unsafe fn test_mm_mask_move_sd() {
57451	let src = _mm_set_pd(`10.`, `11.`);
57452	let a = _mm_set_pd(`1.`, `2.`);
57453	let b = _mm_set_pd(`3.`, `4.`);
57454	let r = _mm_mask_move_sd(src, `0`, a, b);
57455	let e = _mm_set_pd(`1.`, `11.`);
57456	assert_eq_m128d(r, e);
57457	let r = _mm_mask_move_sd(src, `0b11111111`, a, b);
57458	let e = _mm_set_pd(`1.`, `4.`);
57459	assert_eq_m128d(r, e);
57460	}
57461
57462	#[simd_test(enable = "avx512f")]
57463	unsafe fn test_mm_maskz_move_sd() {
57464	let a = _mm_set_pd(`1.`, `2.`);
57465	let b = _mm_set_pd(`3.`, `4.`);
57466	let r = _mm_maskz_move_sd(`0`, a, b);
57467	let e = _mm_set_pd(`1.`, `0.`);
57468	assert_eq_m128d(r, e);
57469	let r = _mm_maskz_move_sd(`0b11111111`, a, b);
57470	let e = _mm_set_pd(`1.`, `4.`);
57471	assert_eq_m128d(r, e);
57472	}
57473
57474	#[simd_test(enable = "avx512f")]
57475	unsafe fn test_mm_mask_add_ss() {
57476	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57477	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57478	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57479	let r = _mm_mask_add_ss(src, `0`, a, b);
57480	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57481	assert_eq_m128(r, e);
57482	let r = _mm_mask_add_ss(src, `0b11111111`, a, b);
57483	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
57484	assert_eq_m128(r, e);
57485	}
57486
57487	#[simd_test(enable = "avx512f")]
57488	unsafe fn test_mm_maskz_add_ss() {
57489	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57490	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57491	let r = _mm_maskz_add_ss(`0`, a, b);
57492	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57493	assert_eq_m128(r, e);
57494	let r = _mm_maskz_add_ss(`0b11111111`, a, b);
57495	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
57496	assert_eq_m128(r, e);
57497	}
57498
57499	#[simd_test(enable = "avx512f")]
57500	unsafe fn test_mm_mask_add_sd() {
57501	let src = _mm_set_pd(`10.`, `11.`);
57502	let a = _mm_set_pd(`1.`, `2.`);
57503	let b = _mm_set_pd(`3.`, `4.`);
57504	let r = _mm_mask_add_sd(src, `0`, a, b);
57505	let e = _mm_set_pd(`1.`, `11.`);
57506	assert_eq_m128d(r, e);
57507	let r = _mm_mask_add_sd(src, `0b11111111`, a, b);
57508	let e = _mm_set_pd(`1.`, `6.`);
57509	assert_eq_m128d(r, e);
57510	}
57511
57512	#[simd_test(enable = "avx512f")]
57513	unsafe fn test_mm_maskz_add_sd() {
57514	let a = _mm_set_pd(`1.`, `2.`);
57515	let b = _mm_set_pd(`3.`, `4.`);
57516	let r = _mm_maskz_add_sd(`0`, a, b);
57517	let e = _mm_set_pd(`1.`, `0.`);
57518	assert_eq_m128d(r, e);
57519	let r = _mm_maskz_add_sd(`0b11111111`, a, b);
57520	let e = _mm_set_pd(`1.`, `6.`);
57521	assert_eq_m128d(r, e);
57522	}
57523
57524	#[simd_test(enable = "avx512f")]
57525	unsafe fn test_mm_mask_sub_ss() {
57526	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57527	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57528	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57529	let r = _mm_mask_sub_ss(src, `0`, a, b);
57530	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57531	assert_eq_m128(r, e);
57532	let r = _mm_mask_sub_ss(src, `0b11111111`, a, b);
57533	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
57534	assert_eq_m128(r, e);
57535	}
57536
57537	#[simd_test(enable = "avx512f")]
57538	unsafe fn test_mm_maskz_sub_ss() {
57539	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57540	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57541	let r = _mm_maskz_sub_ss(`0`, a, b);
57542	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57543	assert_eq_m128(r, e);
57544	let r = _mm_maskz_sub_ss(`0b11111111`, a, b);
57545	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
57546	assert_eq_m128(r, e);
57547	}
57548
57549	#[simd_test(enable = "avx512f")]
57550	unsafe fn test_mm_mask_sub_sd() {
57551	let src = _mm_set_pd(`10.`, `11.`);
57552	let a = _mm_set_pd(`1.`, `2.`);
57553	let b = _mm_set_pd(`3.`, `4.`);
57554	let r = _mm_mask_sub_sd(src, `0`, a, b);
57555	let e = _mm_set_pd(`1.`, `11.`);
57556	assert_eq_m128d(r, e);
57557	let r = _mm_mask_sub_sd(src, `0b11111111`, a, b);
57558	let e = _mm_set_pd(`1.`, `-2.`);
57559	assert_eq_m128d(r, e);
57560	}
57561
57562	#[simd_test(enable = "avx512f")]
57563	unsafe fn test_mm_maskz_sub_sd() {
57564	let a = _mm_set_pd(`1.`, `2.`);
57565	let b = _mm_set_pd(`3.`, `4.`);
57566	let r = _mm_maskz_sub_sd(`0`, a, b);
57567	let e = _mm_set_pd(`1.`, `0.`);
57568	assert_eq_m128d(r, e);
57569	let r = _mm_maskz_sub_sd(`0b11111111`, a, b);
57570	let e = _mm_set_pd(`1.`, `-2.`);
57571	assert_eq_m128d(r, e);
57572	}
57573
57574	#[simd_test(enable = "avx512f")]
57575	unsafe fn test_mm_mask_mul_ss() {
57576	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57577	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57578	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57579	let r = _mm_mask_mul_ss(src, `0`, a, b);
57580	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57581	assert_eq_m128(r, e);
57582	let r = _mm_mask_mul_ss(src, `0b11111111`, a, b);
57583	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
57584	assert_eq_m128(r, e);
57585	}
57586
57587	#[simd_test(enable = "avx512f")]
57588	unsafe fn test_mm_maskz_mul_ss() {
57589	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57590	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57591	let r = _mm_maskz_mul_ss(`0`, a, b);
57592	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57593	assert_eq_m128(r, e);
57594	let r = _mm_maskz_mul_ss(`0b11111111`, a, b);
57595	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
57596	assert_eq_m128(r, e);
57597	}
57598
57599	#[simd_test(enable = "avx512f")]
57600	unsafe fn test_mm_mask_mul_sd() {
57601	let src = _mm_set_pd(`10.`, `11.`);
57602	let a = _mm_set_pd(`1.`, `2.`);
57603	let b = _mm_set_pd(`3.`, `4.`);
57604	let r = _mm_mask_mul_sd(src, `0`, a, b);
57605	let e = _mm_set_pd(`1.`, `11.`);
57606	assert_eq_m128d(r, e);
57607	let r = _mm_mask_mul_sd(src, `0b11111111`, a, b);
57608	let e = _mm_set_pd(`1.`, `8.`);
57609	assert_eq_m128d(r, e);
57610	}
57611
57612	#[simd_test(enable = "avx512f")]
57613	unsafe fn test_mm_maskz_mul_sd() {
57614	let a = _mm_set_pd(`1.`, `2.`);
57615	let b = _mm_set_pd(`3.`, `4.`);
57616	let r = _mm_maskz_mul_sd(`0`, a, b);
57617	let e = _mm_set_pd(`1.`, `0.`);
57618	assert_eq_m128d(r, e);
57619	let r = _mm_maskz_mul_sd(`0b11111111`, a, b);
57620	let e = _mm_set_pd(`1.`, `8.`);
57621	assert_eq_m128d(r, e);
57622	}
57623
57624	#[simd_test(enable = "avx512f")]
57625	unsafe fn test_mm_mask_div_ss() {
57626	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57627	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57628	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57629	let r = _mm_mask_div_ss(src, `0`, a, b);
57630	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57631	assert_eq_m128(r, e);
57632	let r = _mm_mask_div_ss(src, `0b11111111`, a, b);
57633	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
57634	assert_eq_m128(r, e);
57635	}
57636
57637	#[simd_test(enable = "avx512f")]
57638	unsafe fn test_mm_maskz_div_ss() {
57639	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57640	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
57641	let r = _mm_maskz_div_ss(`0`, a, b);
57642	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57643	assert_eq_m128(r, e);
57644	let r = _mm_maskz_div_ss(`0b11111111`, a, b);
57645	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
57646	assert_eq_m128(r, e);
57647	}
57648
57649	#[simd_test(enable = "avx512f")]
57650	unsafe fn test_mm_mask_div_sd() {
57651	let src = _mm_set_pd(`10.`, `11.`);
57652	let a = _mm_set_pd(`1.`, `2.`);
57653	let b = _mm_set_pd(`3.`, `4.`);
57654	let r = _mm_mask_div_sd(src, `0`, a, b);
57655	let e = _mm_set_pd(`1.`, `11.`);
57656	assert_eq_m128d(r, e);
57657	let r = _mm_mask_div_sd(src, `0b11111111`, a, b);
57658	let e = _mm_set_pd(`1.`, `0.5`);
57659	assert_eq_m128d(r, e);
57660	}
57661
57662	#[simd_test(enable = "avx512f")]
57663	unsafe fn test_mm_maskz_div_sd() {
57664	let a = _mm_set_pd(`1.`, `2.`);
57665	let b = _mm_set_pd(`3.`, `4.`);
57666	let r = _mm_maskz_div_sd(`0`, a, b);
57667	let e = _mm_set_pd(`1.`, `0.`);
57668	assert_eq_m128d(r, e);
57669	let r = _mm_maskz_div_sd(`0b11111111`, a, b);
57670	let e = _mm_set_pd(`1.`, `0.5`);
57671	assert_eq_m128d(r, e);
57672	}
57673
57674	#[simd_test(enable = "avx512f")]
57675	unsafe fn test_mm_mask_max_ss() {
57676	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57677	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
57678	let r = _mm_mask_max_ss(a, `0`, a, b);
57679	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57680	assert_eq_m128(r, e);
57681	let r = _mm_mask_max_ss(a, `0b11111111`, a, b);
57682	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
57683	assert_eq_m128(r, e);
57684	}
57685
57686	#[simd_test(enable = "avx512f")]
57687	unsafe fn test_mm_maskz_max_ss() {
57688	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57689	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
57690	let r = _mm_maskz_max_ss(`0`, a, b);
57691	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
57692	assert_eq_m128(r, e);
57693	let r = _mm_maskz_max_ss(`0b11111111`, a, b);
57694	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
57695	assert_eq_m128(r, e);
57696	}
57697
57698	#[simd_test(enable = "avx512f")]
57699	unsafe fn test_mm_mask_max_sd() {
57700	let a = _mm_set_pd(`0.`, `1.`);
57701	let b = _mm_set_pd(`2.`, `3.`);
57702	let r = _mm_mask_max_sd(a, `0`, a, b);
57703	let e = _mm_set_pd(`0.`, `1.`);
57704	assert_eq_m128d(r, e);
57705	let r = _mm_mask_max_sd(a, `0b11111111`, a, b);
57706	let e = _mm_set_pd(`0.`, `3.`);
57707	assert_eq_m128d(r, e);
57708	}
57709
57710	#[simd_test(enable = "avx512f")]
57711	unsafe fn test_mm_maskz_max_sd() {
57712	let a = _mm_set_pd(`0.`, `1.`);
57713	let b = _mm_set_pd(`2.`, `3.`);
57714	let r = _mm_maskz_max_sd(`0`, a, b);
57715	let e = _mm_set_pd(`0.`, `0.`);
57716	assert_eq_m128d(r, e);
57717	let r = _mm_maskz_max_sd(`0b11111111`, a, b);
57718	let e = _mm_set_pd(`0.`, `3.`);
57719	assert_eq_m128d(r, e);
57720	}
57721
57722	#[simd_test(enable = "avx512f")]
57723	unsafe fn test_mm_mask_min_ss() {
57724	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57725	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
57726	let r = _mm_mask_min_ss(a, `0`, a, b);
57727	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57728	assert_eq_m128(r, e);
57729	let r = _mm_mask_min_ss(a, `0b11111111`, a, b);
57730	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57731	assert_eq_m128(r, e);
57732	}
57733
57734	#[simd_test(enable = "avx512f")]
57735	unsafe fn test_mm_maskz_min_ss() {
57736	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57737	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
57738	let r = _mm_maskz_min_ss(`0`, a, b);
57739	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
57740	assert_eq_m128(r, e);
57741	let r = _mm_maskz_min_ss(`0b11111111`, a, b);
57742	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
57743	assert_eq_m128(r, e);
57744	}
57745
57746	#[simd_test(enable = "avx512f")]
57747	unsafe fn test_mm_mask_min_sd() {
57748	let a = _mm_set_pd(`0.`, `1.`);
57749	let b = _mm_set_pd(`2.`, `3.`);
57750	let r = _mm_mask_min_sd(a, `0`, a, b);
57751	let e = _mm_set_pd(`0.`, `1.`);
57752	assert_eq_m128d(r, e);
57753	let r = _mm_mask_min_sd(a, `0b11111111`, a, b);
57754	let e = _mm_set_pd(`0.`, `1.`);
57755	assert_eq_m128d(r, e);
57756	}
57757
57758	#[simd_test(enable = "avx512f")]
57759	unsafe fn test_mm_maskz_min_sd() {
57760	let a = _mm_set_pd(`0.`, `1.`);
57761	let b = _mm_set_pd(`2.`, `3.`);
57762	let r = _mm_maskz_min_sd(`0`, a, b);
57763	let e = _mm_set_pd(`0.`, `0.`);
57764	assert_eq_m128d(r, e);
57765	let r = _mm_maskz_min_sd(`0b11111111`, a, b);
57766	let e = _mm_set_pd(`0.`, `1.`);
57767	assert_eq_m128d(r, e);
57768	}
57769
57770	#[simd_test(enable = "avx512f")]
57771	unsafe fn test_mm_mask_sqrt_ss() {
57772	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57773	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57774	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57775	let r = _mm_mask_sqrt_ss(src, `0`, a, b);
57776	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57777	assert_eq_m128(r, e);
57778	let r = _mm_mask_sqrt_ss(src, `0b11111111`, a, b);
57779	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
57780	assert_eq_m128(r, e);
57781	}
57782
57783	#[simd_test(enable = "avx512f")]
57784	unsafe fn test_mm_maskz_sqrt_ss() {
57785	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57786	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57787	let r = _mm_maskz_sqrt_ss(`0`, a, b);
57788	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57789	assert_eq_m128(r, e);
57790	let r = _mm_maskz_sqrt_ss(`0b11111111`, a, b);
57791	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
57792	assert_eq_m128(r, e);
57793	}
57794
57795	#[simd_test(enable = "avx512f")]
57796	unsafe fn test_mm_mask_sqrt_sd() {
57797	let src = _mm_set_pd(`10.`, `11.`);
57798	let a = _mm_set_pd(`1.`, `2.`);
57799	let b = _mm_set_pd(`3.`, `4.`);
57800	let r = _mm_mask_sqrt_sd(src, `0`, a, b);
57801	let e = _mm_set_pd(`1.`, `11.`);
57802	assert_eq_m128d(r, e);
57803	let r = _mm_mask_sqrt_sd(src, `0b11111111`, a, b);
57804	let e = _mm_set_pd(`1.`, `2.`);
57805	assert_eq_m128d(r, e);
57806	}
57807
57808	#[simd_test(enable = "avx512f")]
57809	unsafe fn test_mm_maskz_sqrt_sd() {
57810	let a = _mm_set_pd(`1.`, `2.`);
57811	let b = _mm_set_pd(`3.`, `4.`);
57812	let r = _mm_maskz_sqrt_sd(`0`, a, b);
57813	let e = _mm_set_pd(`1.`, `0.`);
57814	assert_eq_m128d(r, e);
57815	let r = _mm_maskz_sqrt_sd(`0b11111111`, a, b);
57816	let e = _mm_set_pd(`1.`, `2.`);
57817	assert_eq_m128d(r, e);
57818	}
57819
57820	#[simd_test(enable = "avx512f")]
57821	unsafe fn test_mm_rsqrt14_ss() {
57822	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57823	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57824	let r = _mm_rsqrt14_ss(a, b);
57825	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
57826	assert_eq_m128(r, e);
57827	}
57828
57829	#[simd_test(enable = "avx512f")]
57830	unsafe fn test_mm_mask_rsqrt14_ss() {
57831	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57832	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57833	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57834	let r = _mm_mask_rsqrt14_ss(src, `0`, a, b);
57835	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57836	assert_eq_m128(r, e);
57837	let r = _mm_mask_rsqrt14_ss(src, `0b11111111`, a, b);
57838	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
57839	assert_eq_m128(r, e);
57840	}
57841
57842	#[simd_test(enable = "avx512f")]
57843	unsafe fn test_mm_maskz_rsqrt14_ss() {
57844	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57845	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57846	let r = _mm_maskz_rsqrt14_ss(`0`, a, b);
57847	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57848	assert_eq_m128(r, e);
57849	let r = _mm_maskz_rsqrt14_ss(`0b11111111`, a, b);
57850	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
57851	assert_eq_m128(r, e);
57852	}
57853
57854	#[simd_test(enable = "avx512f")]
57855	unsafe fn test_mm_rsqrt14_sd() {
57856	let a = _mm_set_pd(`1.`, `2.`);
57857	let b = _mm_set_pd(`3.`, `4.`);
57858	let r = _mm_rsqrt14_sd(a, b);
57859	let e = _mm_set_pd(`1.`, `0.5`);
57860	assert_eq_m128d(r, e);
57861	}
57862
57863	#[simd_test(enable = "avx512f")]
57864	unsafe fn test_mm_mask_rsqrt14_sd() {
57865	let src = _mm_set_pd(`10.`, `11.`);
57866	let a = _mm_set_pd(`1.`, `2.`);
57867	let b = _mm_set_pd(`3.`, `4.`);
57868	let r = _mm_mask_rsqrt14_sd(src, `0`, a, b);
57869	let e = _mm_set_pd(`1.`, `11.`);
57870	assert_eq_m128d(r, e);
57871	let r = _mm_mask_rsqrt14_sd(src, `0b11111111`, a, b);
57872	let e = _mm_set_pd(`1.`, `0.5`);
57873	assert_eq_m128d(r, e);
57874	}
57875
57876	#[simd_test(enable = "avx512f")]
57877	unsafe fn test_mm_maskz_rsqrt14_sd() {
57878	let a = _mm_set_pd(`1.`, `2.`);
57879	let b = _mm_set_pd(`3.`, `4.`);
57880	let r = _mm_maskz_rsqrt14_sd(`0`, a, b);
57881	let e = _mm_set_pd(`1.`, `0.`);
57882	assert_eq_m128d(r, e);
57883	let r = _mm_maskz_rsqrt14_sd(`0b11111111`, a, b);
57884	let e = _mm_set_pd(`1.`, `0.5`);
57885	assert_eq_m128d(r, e);
57886	}
57887
57888	#[simd_test(enable = "avx512f")]
57889	unsafe fn test_mm_rcp14_ss() {
57890	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57891	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57892	let r = _mm_rcp14_ss(a, b);
57893	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
57894	assert_eq_m128(r, e);
57895	}
57896
57897	#[simd_test(enable = "avx512f")]
57898	unsafe fn test_mm_mask_rcp14_ss() {
57899	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
57900	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57901	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57902	let r = _mm_mask_rcp14_ss(src, `0`, a, b);
57903	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
57904	assert_eq_m128(r, e);
57905	let r = _mm_mask_rcp14_ss(src, `0b11111111`, a, b);
57906	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
57907	assert_eq_m128(r, e);
57908	}
57909
57910	#[simd_test(enable = "avx512f")]
57911	unsafe fn test_mm_maskz_rcp14_ss() {
57912	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
57913	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
57914	let r = _mm_maskz_rcp14_ss(`0`, a, b);
57915	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
57916	assert_eq_m128(r, e);
57917	let r = _mm_maskz_rcp14_ss(`0b11111111`, a, b);
57918	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
57919	assert_eq_m128(r, e);
57920	}
57921
57922	#[simd_test(enable = "avx512f")]
57923	unsafe fn test_mm_rcp14_sd() {
57924	let a = _mm_set_pd(`1.`, `2.`);
57925	let b = _mm_set_pd(`3.`, `4.`);
57926	let r = _mm_rcp14_sd(a, b);
57927	let e = _mm_set_pd(`1.`, `0.25`);
57928	assert_eq_m128d(r, e);
57929	}
57930
57931	#[simd_test(enable = "avx512f")]
57932	unsafe fn test_mm_mask_rcp14_sd() {
57933	let src = _mm_set_pd(`10.`, `11.`);
57934	let a = _mm_set_pd(`1.`, `2.`);
57935	let b = _mm_set_pd(`3.`, `4.`);
57936	let r = _mm_mask_rcp14_sd(src, `0`, a, b);
57937	let e = _mm_set_pd(`1.`, `11.`);
57938	assert_eq_m128d(r, e);
57939	let r = _mm_mask_rcp14_sd(src, `0b11111111`, a, b);
57940	let e = _mm_set_pd(`1.`, `0.25`);
57941	assert_eq_m128d(r, e);
57942	}
57943
57944	#[simd_test(enable = "avx512f")]
57945	unsafe fn test_mm_maskz_rcp14_sd() {
57946	let a = _mm_set_pd(`1.`, `2.`);
57947	let b = _mm_set_pd(`3.`, `4.`);
57948	let r = _mm_maskz_rcp14_sd(`0`, a, b);
57949	let e = _mm_set_pd(`1.`, `0.`);
57950	assert_eq_m128d(r, e);
57951	let r = _mm_maskz_rcp14_sd(`0b11111111`, a, b);
57952	let e = _mm_set_pd(`1.`, `0.25`);
57953	assert_eq_m128d(r, e);
57954	}
57955
57956	#[simd_test(enable = "avx512f")]
57957	unsafe fn test_mm_getexp_ss() {
57958	let a = _mm_set1_ps(`2.`);
57959	let b = _mm_set1_ps(`3.`);
57960	let r = _mm_getexp_ss(a, b);
57961	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
57962	assert_eq_m128(r, e);
57963	}
57964
57965	#[simd_test(enable = "avx512f")]
57966	unsafe fn test_mm_mask_getexp_ss() {
57967	let a = _mm_set1_ps(`2.`);
57968	let b = _mm_set1_ps(`3.`);
57969	let r = _mm_mask_getexp_ss(a, `0`, a, b);
57970	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
57971	assert_eq_m128(r, e);
57972	let r = _mm_mask_getexp_ss(a, `0b11111111`, a, b);
57973	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
57974	assert_eq_m128(r, e);
57975	}
57976
57977	#[simd_test(enable = "avx512f")]
57978	unsafe fn test_mm_maskz_getexp_ss() {
57979	let a = _mm_set1_ps(`2.`);
57980	let b = _mm_set1_ps(`3.`);
57981	let r = _mm_maskz_getexp_ss(`0`, a, b);
57982	let e = _mm_set_ps(`2.`, `2.`, `2.`, `0.`);
57983	assert_eq_m128(r, e);
57984	let r = _mm_maskz_getexp_ss(`0b11111111`, a, b);
57985	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
57986	assert_eq_m128(r, e);
57987	}
57988
57989	#[simd_test(enable = "avx512f")]
57990	unsafe fn test_mm_getexp_sd() {
57991	let a = _mm_set1_pd(`2.`);
57992	let b = _mm_set1_pd(`3.`);
57993	let r = _mm_getexp_sd(a, b);
57994	let e = _mm_set_pd(`2.`, `1.`);
57995	assert_eq_m128d(r, e);
57996	}
57997
57998	#[simd_test(enable = "avx512f")]
57999	unsafe fn test_mm_mask_getexp_sd() {
58000	let a = _mm_set1_pd(`2.`);
58001	let b = _mm_set1_pd(`3.`);
58002	let r = _mm_mask_getexp_sd(a, `0`, a, b);
58003	let e = _mm_set_pd(`2.`, `2.`);
58004	assert_eq_m128d(r, e);
58005	let r = _mm_mask_getexp_sd(a, `0b11111111`, a, b);
58006	let e = _mm_set_pd(`2.`, `1.`);
58007	assert_eq_m128d(r, e);
58008	}
58009
58010	#[simd_test(enable = "avx512f")]
58011	unsafe fn test_mm_maskz_getexp_sd() {
58012	let a = _mm_set1_pd(`2.`);
58013	let b = _mm_set1_pd(`3.`);
58014	let r = _mm_maskz_getexp_sd(`0`, a, b);
58015	let e = _mm_set_pd(`2.`, `0.`);
58016	assert_eq_m128d(r, e);
58017	let r = _mm_maskz_getexp_sd(`0b11111111`, a, b);
58018	let e = _mm_set_pd(`2.`, `1.`);
58019	assert_eq_m128d(r, e);
58020	}
58021
58022	#[simd_test(enable = "avx512f")]
58023	unsafe fn test_mm_getmant_ss() {
58024	let a = _mm_set1_ps(`20.`);
58025	let b = _mm_set1_ps(`10.`);
58026	let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58027	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
58028	assert_eq_m128(r, e);
58029	}
58030
58031	#[simd_test(enable = "avx512f")]
58032	unsafe fn test_mm_mask_getmant_ss() {
58033	let a = _mm_set1_ps(`20.`);
58034	let b = _mm_set1_ps(`10.`);
58035	let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a, b);
58036	let e = _mm_set_ps(`20.`, `20.`, `20.`, `20.`);
58037	assert_eq_m128(r, e);
58038	let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a, b);
58039	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
58040	assert_eq_m128(r, e);
58041	}
58042
58043	#[simd_test(enable = "avx512f")]
58044	unsafe fn test_mm_maskz_getmant_ss() {
58045	let a = _mm_set1_ps(`20.`);
58046	let b = _mm_set1_ps(`10.`);
58047	let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a, b);
58048	let e = _mm_set_ps(`20.`, `20.`, `20.`, `0.`);
58049	assert_eq_m128(r, e);
58050	let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a, b);
58051	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
58052	assert_eq_m128(r, e);
58053	}
58054
58055	#[simd_test(enable = "avx512f")]
58056	unsafe fn test_mm_getmant_sd() {
58057	let a = _mm_set1_pd(`20.`);
58058	let b = _mm_set1_pd(`10.`);
58059	let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58060	let e = _mm_set_pd(`20.`, `1.25`);
58061	assert_eq_m128d(r, e);
58062	}
58063
58064	#[simd_test(enable = "avx512f")]
58065	unsafe fn test_mm_mask_getmant_sd() {
58066	let a = _mm_set1_pd(`20.`);
58067	let b = _mm_set1_pd(`10.`);
58068	let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a, b);
58069	let e = _mm_set_pd(`20.`, `20.`);
58070	assert_eq_m128d(r, e);
58071	let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a, b);
58072	let e = _mm_set_pd(`20.`, `1.25`);
58073	assert_eq_m128d(r, e);
58074	}
58075
58076	#[simd_test(enable = "avx512f")]
58077	unsafe fn test_mm_maskz_getmant_sd() {
58078	let a = _mm_set1_pd(`20.`);
58079	let b = _mm_set1_pd(`10.`);
58080	let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a, b);
58081	let e = _mm_set_pd(`20.`, `0.`);
58082	assert_eq_m128d(r, e);
58083	let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a, b);
58084	let e = _mm_set_pd(`20.`, `1.25`);
58085	assert_eq_m128d(r, e);
58086	}
58087
58088	#[simd_test(enable = "avx512f")]
58089	unsafe fn test_mm_roundscale_ss() {
58090	let a = _mm_set1_ps(`2.2`);
58091	let b = _mm_set1_ps(`1.1`);
58092	let r = _mm_roundscale_ss::<`0`>(a, b);
58093	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
58094	assert_eq_m128(r, e);
58095	}
58096
58097	#[simd_test(enable = "avx512f")]
58098	unsafe fn test_mm_mask_roundscale_ss() {
58099	let a = _mm_set1_ps(`2.2`);
58100	let b = _mm_set1_ps(`1.1`);
58101	let r = _mm_mask_roundscale_ss::<`0`>(a, `0`, a, b);
58102	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `2.2`);
58103	assert_eq_m128(r, e);
58104	let r = _mm_mask_roundscale_ss::<`0`>(a, `0b11111111`, a, b);
58105	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
58106	assert_eq_m128(r, e);
58107	}
58108
58109	#[simd_test(enable = "avx512f")]
58110	unsafe fn test_mm_maskz_roundscale_ss() {
58111	let a = _mm_set1_ps(`2.2`);
58112	let b = _mm_set1_ps(`1.1`);
58113	let r = _mm_maskz_roundscale_ss::<`0`>(`0`, a, b);
58114	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `0.0`);
58115	assert_eq_m128(r, e);
58116	let r = _mm_maskz_roundscale_ss::<`0`>(`0b11111111`, a, b);
58117	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
58118	assert_eq_m128(r, e);
58119	}
58120
58121	#[simd_test(enable = "avx512f")]
58122	unsafe fn test_mm_roundscale_sd() {
58123	let a = _mm_set1_pd(`2.2`);
58124	let b = _mm_set1_pd(`1.1`);
58125	let r = _mm_roundscale_sd::<`0`>(a, b);
58126	let e = _mm_set_pd(`2.2`, `1.0`);
58127	assert_eq_m128d(r, e);
58128	}
58129
58130	#[simd_test(enable = "avx512f")]
58131	unsafe fn test_mm_mask_roundscale_sd() {
58132	let a = _mm_set1_pd(`2.2`);
58133	let b = _mm_set1_pd(`1.1`);
58134	let r = _mm_mask_roundscale_sd::<`0`>(a, `0`, a, b);
58135	let e = _mm_set_pd(`2.2`, `2.2`);
58136	assert_eq_m128d(r, e);
58137	let r = _mm_mask_roundscale_sd::<`0`>(a, `0b11111111`, a, b);
58138	let e = _mm_set_pd(`2.2`, `1.0`);
58139	assert_eq_m128d(r, e);
58140	}
58141
58142	#[simd_test(enable = "avx512f")]
58143	unsafe fn test_mm_maskz_roundscale_sd() {
58144	let a = _mm_set1_pd(`2.2`);
58145	let b = _mm_set1_pd(`1.1`);
58146	let r = _mm_maskz_roundscale_sd::<`0`>(`0`, a, b);
58147	let e = _mm_set_pd(`2.2`, `0.0`);
58148	assert_eq_m128d(r, e);
58149	let r = _mm_maskz_roundscale_sd::<`0`>(`0b11111111`, a, b);
58150	let e = _mm_set_pd(`2.2`, `1.0`);
58151	assert_eq_m128d(r, e);
58152	}
58153
58154	#[simd_test(enable = "avx512f")]
58155	unsafe fn test_mm_scalef_ss() {
58156	let a = _mm_set1_ps(`1.`);
58157	let b = _mm_set1_ps(`3.`);
58158	let r = _mm_scalef_ss(a, b);
58159	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
58160	assert_eq_m128(r, e);
58161	}
58162
58163	#[simd_test(enable = "avx512f")]
58164	unsafe fn test_mm_mask_scalef_ss() {
58165	let a = _mm_set1_ps(`1.`);
58166	let b = _mm_set1_ps(`3.`);
58167	let r = _mm_mask_scalef_ss(a, `0`, a, b);
58168	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
58169	assert_eq_m128(r, e);
58170	let r = _mm_mask_scalef_ss(a, `0b11111111`, a, b);
58171	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
58172	assert_eq_m128(r, e);
58173	}
58174
58175	#[simd_test(enable = "avx512f")]
58176	unsafe fn test_mm_maskz_scalef_ss() {
58177	let a = _mm_set1_ps(`1.`);
58178	let b = _mm_set1_ps(`3.`);
58179	let r = _mm_maskz_scalef_ss(`0`, a, b);
58180	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
58181	assert_eq_m128(r, e);
58182	let r = _mm_maskz_scalef_ss(`0b11111111`, a, b);
58183	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
58184	assert_eq_m128(r, e);
58185	}
58186
58187	#[simd_test(enable = "avx512f")]
58188	unsafe fn test_mm_scalef_sd() {
58189	let a = _mm_set1_pd(`1.`);
58190	let b = _mm_set1_pd(`3.`);
58191	let r = _mm_scalef_sd(a, b);
58192	let e = _mm_set_pd(`1.`, `8.`);
58193	assert_eq_m128d(r, e);
58194	}
58195
58196	#[simd_test(enable = "avx512f")]
58197	unsafe fn test_mm_mask_scalef_sd() {
58198	let a = _mm_set1_pd(`1.`);
58199	let b = _mm_set1_pd(`3.`);
58200	let r = _mm_mask_scalef_sd(a, `0`, a, b);
58201	let e = _mm_set_pd(`1.`, `1.`);
58202	assert_eq_m128d(r, e);
58203	let r = _mm_mask_scalef_sd(a, `0b11111111`, a, b);
58204	let e = _mm_set_pd(`1.`, `8.`);
58205	assert_eq_m128d(r, e);
58206	}
58207
58208	#[simd_test(enable = "avx512f")]
58209	unsafe fn test_mm_maskz_scalef_sd() {
58210	let a = _mm_set1_pd(`1.`);
58211	let b = _mm_set1_pd(`3.`);
58212	let r = _mm_maskz_scalef_sd(`0`, a, b);
58213	let e = _mm_set_pd(`1.`, `0.`);
58214	assert_eq_m128d(r, e);
58215	let r = _mm_maskz_scalef_sd(`0b11111111`, a, b);
58216	let e = _mm_set_pd(`1.`, `8.`);
58217	assert_eq_m128d(r, e);
58218	}
58219
58220	#[simd_test(enable = "avx512f")]
58221	unsafe fn test_mm_mask_fmadd_ss() {
58222	let a = _mm_set1_ps(`1.`);
58223	let b = _mm_set1_ps(`2.`);
58224	let c = _mm_set1_ps(`3.`);
58225	let r = _mm_mask_fmadd_ss(a, `0`, b, c);
58226	assert_eq_m128(r, a);
58227	let r = _mm_mask_fmadd_ss(a, `0b11111111`, b, c);
58228	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
58229	assert_eq_m128(r, e);
58230	}
58231
58232	#[simd_test(enable = "avx512f")]
58233	unsafe fn test_mm_maskz_fmadd_ss() {
58234	let a = _mm_set1_ps(`1.`);
58235	let b = _mm_set1_ps(`2.`);
58236	let c = _mm_set1_ps(`3.`);
58237	let r = _mm_maskz_fmadd_ss(`0`, a, b, c);
58238	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
58239	assert_eq_m128(r, e);
58240	let r = _mm_maskz_fmadd_ss(`0b11111111`, a, b, c);
58241	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
58242	assert_eq_m128(r, e);
58243	}
58244
58245	#[simd_test(enable = "avx512f")]
58246	unsafe fn test_mm_mask3_fmadd_ss() {
58247	let a = _mm_set1_ps(`1.`);
58248	let b = _mm_set1_ps(`2.`);
58249	let c = _mm_set1_ps(`3.`);
58250	let r = _mm_mask3_fmadd_ss(a, b, c, `0`);
58251	assert_eq_m128(r, c);
58252	let r = _mm_mask3_fmadd_ss(a, b, c, `0b11111111`);
58253	let e = _mm_set_ps(`3.`, `3.`, `3.`, `5.`);
58254	assert_eq_m128(r, e);
58255	}
58256
58257	#[simd_test(enable = "avx512f")]
58258	unsafe fn test_mm_mask_fmadd_sd() {
58259	let a = _mm_set1_pd(`1.`);
58260	let b = _mm_set1_pd(`2.`);
58261	let c = _mm_set1_pd(`3.`);
58262	let r = _mm_mask_fmadd_sd(a, `0`, b, c);
58263	assert_eq_m128d(r, a);
58264	let r = _mm_mask_fmadd_sd(a, `0b11111111`, b, c);
58265	let e = _mm_set_pd(`1.`, `5.`);
58266	assert_eq_m128d(r, e);
58267	}
58268
58269	#[simd_test(enable = "avx512f")]
58270	unsafe fn test_mm_maskz_fmadd_sd() {
58271	let a = _mm_set1_pd(`1.`);
58272	let b = _mm_set1_pd(`2.`);
58273	let c = _mm_set1_pd(`3.`);
58274	let r = _mm_maskz_fmadd_sd(`0`, a, b, c);
58275	let e = _mm_set_pd(`1.`, `0.`);
58276	assert_eq_m128d(r, e);
58277	let r = _mm_maskz_fmadd_sd(`0b11111111`, a, b, c);
58278	let e = _mm_set_pd(`1.`, `5.`);
58279	assert_eq_m128d(r, e);
58280	}
58281
58282	#[simd_test(enable = "avx512f")]
58283	unsafe fn test_mm_mask3_fmadd_sd() {
58284	let a = _mm_set1_pd(`1.`);
58285	let b = _mm_set1_pd(`2.`);
58286	let c = _mm_set1_pd(`3.`);
58287	let r = _mm_mask3_fmadd_sd(a, b, c, `0`);
58288	assert_eq_m128d(r, c);
58289	let r = _mm_mask3_fmadd_sd(a, b, c, `0b11111111`);
58290	let e = _mm_set_pd(`3.`, `5.`);
58291	assert_eq_m128d(r, e);
58292	}
58293
58294	#[simd_test(enable = "avx512f")]
58295	unsafe fn test_mm_mask_fmsub_ss() {
58296	let a = _mm_set1_ps(`1.`);
58297	let b = _mm_set1_ps(`2.`);
58298	let c = _mm_set1_ps(`3.`);
58299	let r = _mm_mask_fmsub_ss(a, `0`, b, c);
58300	assert_eq_m128(r, a);
58301	let r = _mm_mask_fmsub_ss(a, `0b11111111`, b, c);
58302	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
58303	assert_eq_m128(r, e);
58304	}
58305
58306	#[simd_test(enable = "avx512f")]
58307	unsafe fn test_mm_maskz_fmsub_ss() {
58308	let a = _mm_set1_ps(`1.`);
58309	let b = _mm_set1_ps(`2.`);
58310	let c = _mm_set1_ps(`3.`);
58311	let r = _mm_maskz_fmsub_ss(`0`, a, b, c);
58312	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
58313	assert_eq_m128(r, e);
58314	let r = _mm_maskz_fmsub_ss(`0b11111111`, a, b, c);
58315	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
58316	assert_eq_m128(r, e);
58317	}
58318
58319	#[simd_test(enable = "avx512f")]
58320	unsafe fn test_mm_mask3_fmsub_ss() {
58321	let a = _mm_set1_ps(`1.`);
58322	let b = _mm_set1_ps(`2.`);
58323	let c = _mm_set1_ps(`3.`);
58324	let r = _mm_mask3_fmsub_ss(a, b, c, `0`);
58325	assert_eq_m128(r, c);
58326	let r = _mm_mask3_fmsub_ss(a, b, c, `0b11111111`);
58327	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-1.`);
58328	assert_eq_m128(r, e);
58329	}
58330
58331	#[simd_test(enable = "avx512f")]
58332	unsafe fn test_mm_mask_fmsub_sd() {
58333	let a = _mm_set1_pd(`1.`);
58334	let b = _mm_set1_pd(`2.`);
58335	let c = _mm_set1_pd(`3.`);
58336	let r = _mm_mask_fmsub_sd(a, `0`, b, c);
58337	assert_eq_m128d(r, a);
58338	let r = _mm_mask_fmsub_sd(a, `0b11111111`, b, c);
58339	let e = _mm_set_pd(`1.`, `-1.`);
58340	assert_eq_m128d(r, e);
58341	}
58342
58343	#[simd_test(enable = "avx512f")]
58344	unsafe fn test_mm_maskz_fmsub_sd() {
58345	let a = _mm_set1_pd(`1.`);
58346	let b = _mm_set1_pd(`2.`);
58347	let c = _mm_set1_pd(`3.`);
58348	let r = _mm_maskz_fmsub_sd(`0`, a, b, c);
58349	let e = _mm_set_pd(`1.`, `0.`);
58350	assert_eq_m128d(r, e);
58351	let r = _mm_maskz_fmsub_sd(`0b11111111`, a, b, c);
58352	let e = _mm_set_pd(`1.`, `-1.`);
58353	assert_eq_m128d(r, e);
58354	}
58355
58356	#[simd_test(enable = "avx512f")]
58357	unsafe fn test_mm_mask3_fmsub_sd() {
58358	let a = _mm_set1_pd(`1.`);
58359	let b = _mm_set1_pd(`2.`);
58360	let c = _mm_set1_pd(`3.`);
58361	let r = _mm_mask3_fmsub_sd(a, b, c, `0`);
58362	assert_eq_m128d(r, c);
58363	let r = _mm_mask3_fmsub_sd(a, b, c, `0b11111111`);
58364	let e = _mm_set_pd(`3.`, `-1.`);
58365	assert_eq_m128d(r, e);
58366	}
58367
58368	#[simd_test(enable = "avx512f")]
58369	unsafe fn test_mm_mask_fnmadd_ss() {
58370	let a = _mm_set1_ps(`1.`);
58371	let b = _mm_set1_ps(`2.`);
58372	let c = _mm_set1_ps(`3.`);
58373	let r = _mm_mask_fnmadd_ss(a, `0`, b, c);
58374	assert_eq_m128(r, a);
58375	let r = _mm_mask_fnmadd_ss(a, `0b11111111`, b, c);
58376	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
58377	assert_eq_m128(r, e);
58378	}
58379
58380	#[simd_test(enable = "avx512f")]
58381	unsafe fn test_mm_maskz_fnmadd_ss() {
58382	let a = _mm_set1_ps(`1.`);
58383	let b = _mm_set1_ps(`2.`);
58384	let c = _mm_set1_ps(`3.`);
58385	let r = _mm_maskz_fnmadd_ss(`0`, a, b, c);
58386	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
58387	assert_eq_m128(r, e);
58388	let r = _mm_maskz_fnmadd_ss(`0b11111111`, a, b, c);
58389	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
58390	assert_eq_m128(r, e);
58391	}
58392
58393	#[simd_test(enable = "avx512f")]
58394	unsafe fn test_mm_mask3_fnmadd_ss() {
58395	let a = _mm_set1_ps(`1.`);
58396	let b = _mm_set1_ps(`2.`);
58397	let c = _mm_set1_ps(`3.`);
58398	let r = _mm_mask3_fnmadd_ss(a, b, c, `0`);
58399	assert_eq_m128(r, c);
58400	let r = _mm_mask3_fnmadd_ss(a, b, c, `0b11111111`);
58401	let e = _mm_set_ps(`3.`, `3.`, `3.`, `1.`);
58402	assert_eq_m128(r, e);
58403	}
58404
58405	#[simd_test(enable = "avx512f")]
58406	unsafe fn test_mm_mask_fnmadd_sd() {
58407	let a = _mm_set1_pd(`1.`);
58408	let b = _mm_set1_pd(`2.`);
58409	let c = _mm_set1_pd(`3.`);
58410	let r = _mm_mask_fnmadd_sd(a, `0`, b, c);
58411	assert_eq_m128d(r, a);
58412	let r = _mm_mask_fnmadd_sd(a, `0b11111111`, b, c);
58413	let e = _mm_set_pd(`1.`, `1.`);
58414	assert_eq_m128d(r, e);
58415	}
58416
58417	#[simd_test(enable = "avx512f")]
58418	unsafe fn test_mm_maskz_fnmadd_sd() {
58419	let a = _mm_set1_pd(`1.`);
58420	let b = _mm_set1_pd(`2.`);
58421	let c = _mm_set1_pd(`3.`);
58422	let r = _mm_maskz_fnmadd_sd(`0`, a, b, c);
58423	let e = _mm_set_pd(`1.`, `0.`);
58424	assert_eq_m128d(r, e);
58425	let r = _mm_maskz_fnmadd_sd(`0b11111111`, a, b, c);
58426	let e = _mm_set_pd(`1.`, `1.`);
58427	assert_eq_m128d(r, e);
58428	}
58429
58430	#[simd_test(enable = "avx512f")]
58431	unsafe fn test_mm_mask3_fnmadd_sd() {
58432	let a = _mm_set1_pd(`1.`);
58433	let b = _mm_set1_pd(`2.`);
58434	let c = _mm_set1_pd(`3.`);
58435	let r = _mm_mask3_fnmadd_sd(a, b, c, `0`);
58436	assert_eq_m128d(r, c);
58437	let r = _mm_mask3_fnmadd_sd(a, b, c, `0b11111111`);
58438	let e = _mm_set_pd(`3.`, `1.`);
58439	assert_eq_m128d(r, e);
58440	}
58441
58442	#[simd_test(enable = "avx512f")]
58443	unsafe fn test_mm_mask_fnmsub_ss() {
58444	let a = _mm_set1_ps(`1.`);
58445	let b = _mm_set1_ps(`2.`);
58446	let c = _mm_set1_ps(`3.`);
58447	let r = _mm_mask_fnmsub_ss(a, `0`, b, c);
58448	assert_eq_m128(r, a);
58449	let r = _mm_mask_fnmsub_ss(a, `0b11111111`, b, c);
58450	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
58451	assert_eq_m128(r, e);
58452	}
58453
58454	#[simd_test(enable = "avx512f")]
58455	unsafe fn test_mm_maskz_fnmsub_ss() {
58456	let a = _mm_set1_ps(`1.`);
58457	let b = _mm_set1_ps(`2.`);
58458	let c = _mm_set1_ps(`3.`);
58459	let r = _mm_maskz_fnmsub_ss(`0`, a, b, c);
58460	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
58461	assert_eq_m128(r, e);
58462	let r = _mm_maskz_fnmsub_ss(`0b11111111`, a, b, c);
58463	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
58464	assert_eq_m128(r, e);
58465	}
58466
58467	#[simd_test(enable = "avx512f")]
58468	unsafe fn test_mm_mask3_fnmsub_ss() {
58469	let a = _mm_set1_ps(`1.`);
58470	let b = _mm_set1_ps(`2.`);
58471	let c = _mm_set1_ps(`3.`);
58472	let r = _mm_mask3_fnmsub_ss(a, b, c, `0`);
58473	assert_eq_m128(r, c);
58474	let r = _mm_mask3_fnmsub_ss(a, b, c, `0b11111111`);
58475	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-5.`);
58476	assert_eq_m128(r, e);
58477	}
58478
58479	#[simd_test(enable = "avx512f")]
58480	unsafe fn test_mm_mask_fnmsub_sd() {
58481	let a = _mm_set1_pd(`1.`);
58482	let b = _mm_set1_pd(`2.`);
58483	let c = _mm_set1_pd(`3.`);
58484	let r = _mm_mask_fnmsub_sd(a, `0`, b, c);
58485	assert_eq_m128d(r, a);
58486	let r = _mm_mask_fnmsub_sd(a, `0b11111111`, b, c);
58487	let e = _mm_set_pd(`1.`, `-5.`);
58488	assert_eq_m128d(r, e);
58489	}
58490
58491	#[simd_test(enable = "avx512f")]
58492	unsafe fn test_mm_maskz_fnmsub_sd() {
58493	let a = _mm_set1_pd(`1.`);
58494	let b = _mm_set1_pd(`2.`);
58495	let c = _mm_set1_pd(`3.`);
58496	let r = _mm_maskz_fnmsub_sd(`0`, a, b, c);
58497	let e = _mm_set_pd(`1.`, `0.`);
58498	assert_eq_m128d(r, e);
58499	let r = _mm_maskz_fnmsub_sd(`0b11111111`, a, b, c);
58500	let e = _mm_set_pd(`1.`, `-5.`);
58501	assert_eq_m128d(r, e);
58502	}
58503
58504	#[simd_test(enable = "avx512f")]
58505	unsafe fn test_mm_mask3_fnmsub_sd() {
58506	let a = _mm_set1_pd(`1.`);
58507	let b = _mm_set1_pd(`2.`);
58508	let c = _mm_set1_pd(`3.`);
58509	let r = _mm_mask3_fnmsub_sd(a, b, c, `0`);
58510	assert_eq_m128d(r, c);
58511	let r = _mm_mask3_fnmsub_sd(a, b, c, `0b11111111`);
58512	let e = _mm_set_pd(`3.`, `-5.`);
58513	assert_eq_m128d(r, e);
58514	}
58515
58516	#[simd_test(enable = "avx512f")]
58517	unsafe fn test_mm_add_round_ss() {
58518	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58519	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58520	let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58521	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
58522	assert_eq_m128(r, e);
58523	}
58524
58525	#[simd_test(enable = "avx512f")]
58526	unsafe fn test_mm_mask_add_round_ss() {
58527	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
58528	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58529	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58530	let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58531	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
58532	assert_eq_m128(r, e);
58533	let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58534	src, `0b11111111`, a, b,
58535	);
58536	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
58537	assert_eq_m128(r, e);
58538	}
58539
58540	#[simd_test(enable = "avx512f")]
58541	unsafe fn test_mm_maskz_add_round_ss() {
58542	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58543	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58544	let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58545	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
58546	assert_eq_m128(r, e);
58547	let r =
58548	_mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58549	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
58550	assert_eq_m128(r, e);
58551	}
58552
58553	#[simd_test(enable = "avx512f")]
58554	unsafe fn test_mm_add_round_sd() {
58555	let a = _mm_set_pd(`1.`, `2.`);
58556	let b = _mm_set_pd(`3.`, `4.`);
58557	let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58558	let e = _mm_set_pd(`1.`, `6.`);
58559	assert_eq_m128d(r, e);
58560	}
58561
58562	#[simd_test(enable = "avx512f")]
58563	unsafe fn test_mm_mask_add_round_sd() {
58564	let src = _mm_set_pd(`10.`, `11.`);
58565	let a = _mm_set_pd(`1.`, `2.`);
58566	let b = _mm_set_pd(`3.`, `4.`);
58567	let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58568	let e = _mm_set_pd(`1.`, `11.`);
58569	assert_eq_m128d(r, e);
58570	let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58571	src, `0b11111111`, a, b,
58572	);
58573	let e = _mm_set_pd(`1.`, `6.`);
58574	assert_eq_m128d(r, e);
58575	}
58576
58577	#[simd_test(enable = "avx512f")]
58578	unsafe fn test_mm_maskz_add_round_sd() {
58579	let a = _mm_set_pd(`1.`, `2.`);
58580	let b = _mm_set_pd(`3.`, `4.`);
58581	let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58582	let e = _mm_set_pd(`1.`, `0.`);
58583	assert_eq_m128d(r, e);
58584	let r =
58585	_mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58586	let e = _mm_set_pd(`1.`, `6.`);
58587	assert_eq_m128d(r, e);
58588	}
58589
58590	#[simd_test(enable = "avx512f")]
58591	unsafe fn test_mm_sub_round_ss() {
58592	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58593	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58594	let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58595	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
58596	assert_eq_m128(r, e);
58597	}
58598
58599	#[simd_test(enable = "avx512f")]
58600	unsafe fn test_mm_mask_sub_round_ss() {
58601	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
58602	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58603	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58604	let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58605	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
58606	assert_eq_m128(r, e);
58607	let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58608	src, `0b11111111`, a, b,
58609	);
58610	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
58611	assert_eq_m128(r, e);
58612	}
58613
58614	#[simd_test(enable = "avx512f")]
58615	unsafe fn test_mm_maskz_sub_round_ss() {
58616	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58617	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58618	let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58619	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
58620	assert_eq_m128(r, e);
58621	let r =
58622	_mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58623	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
58624	assert_eq_m128(r, e);
58625	}
58626
58627	#[simd_test(enable = "avx512f")]
58628	unsafe fn test_mm_sub_round_sd() {
58629	let a = _mm_set_pd(`1.`, `2.`);
58630	let b = _mm_set_pd(`3.`, `4.`);
58631	let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58632	let e = _mm_set_pd(`1.`, `-2.`);
58633	assert_eq_m128d(r, e);
58634	}
58635
58636	#[simd_test(enable = "avx512f")]
58637	unsafe fn test_mm_mask_sub_round_sd() {
58638	let src = _mm_set_pd(`10.`, `11.`);
58639	let a = _mm_set_pd(`1.`, `2.`);
58640	let b = _mm_set_pd(`3.`, `4.`);
58641	let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58642	let e = _mm_set_pd(`1.`, `11.`);
58643	assert_eq_m128d(r, e);
58644	let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58645	src, `0b11111111`, a, b,
58646	);
58647	let e = _mm_set_pd(`1.`, `-2.`);
58648	assert_eq_m128d(r, e);
58649	}
58650
58651	#[simd_test(enable = "avx512f")]
58652	unsafe fn test_mm_maskz_sub_round_sd() {
58653	let a = _mm_set_pd(`1.`, `2.`);
58654	let b = _mm_set_pd(`3.`, `4.`);
58655	let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58656	let e = _mm_set_pd(`1.`, `0.`);
58657	assert_eq_m128d(r, e);
58658	let r =
58659	_mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58660	let e = _mm_set_pd(`1.`, `-2.`);
58661	assert_eq_m128d(r, e);
58662	}
58663
58664	#[simd_test(enable = "avx512f")]
58665	unsafe fn test_mm_mul_round_ss() {
58666	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58667	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58668	let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58669	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
58670	assert_eq_m128(r, e);
58671	}
58672
58673	#[simd_test(enable = "avx512f")]
58674	unsafe fn test_mm_mask_mul_round_ss() {
58675	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
58676	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58677	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58678	let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58679	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
58680	assert_eq_m128(r, e);
58681	let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58682	src, `0b11111111`, a, b,
58683	);
58684	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
58685	assert_eq_m128(r, e);
58686	}
58687
58688	#[simd_test(enable = "avx512f")]
58689	unsafe fn test_mm_maskz_mul_round_ss() {
58690	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58691	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58692	let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58693	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
58694	assert_eq_m128(r, e);
58695	let r =
58696	_mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58697	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
58698	assert_eq_m128(r, e);
58699	}
58700
58701	#[simd_test(enable = "avx512f")]
58702	unsafe fn test_mm_mul_round_sd() {
58703	let a = _mm_set_pd(`1.`, `2.`);
58704	let b = _mm_set_pd(`3.`, `4.`);
58705	let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58706	let e = _mm_set_pd(`1.`, `8.`);
58707	assert_eq_m128d(r, e);
58708	}
58709
58710	#[simd_test(enable = "avx512f")]
58711	unsafe fn test_mm_mask_mul_round_sd() {
58712	let src = _mm_set_pd(`10.`, `11.`);
58713	let a = _mm_set_pd(`1.`, `2.`);
58714	let b = _mm_set_pd(`3.`, `4.`);
58715	let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58716	let e = _mm_set_pd(`1.`, `11.`);
58717	assert_eq_m128d(r, e);
58718	let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58719	src, `0b11111111`, a, b,
58720	);
58721	let e = _mm_set_pd(`1.`, `8.`);
58722	assert_eq_m128d(r, e);
58723	}
58724
58725	#[simd_test(enable = "avx512f")]
58726	unsafe fn test_mm_maskz_mul_round_sd() {
58727	let a = _mm_set_pd(`1.`, `2.`);
58728	let b = _mm_set_pd(`3.`, `4.`);
58729	let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58730	let e = _mm_set_pd(`1.`, `0.`);
58731	assert_eq_m128d(r, e);
58732	let r =
58733	_mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58734	let e = _mm_set_pd(`1.`, `8.`);
58735	assert_eq_m128d(r, e);
58736	}
58737
58738	#[simd_test(enable = "avx512f")]
58739	unsafe fn test_mm_div_round_ss() {
58740	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58741	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58742	let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58743	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
58744	assert_eq_m128(r, e);
58745	}
58746
58747	#[simd_test(enable = "avx512f")]
58748	unsafe fn test_mm_mask_div_round_ss() {
58749	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
58750	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58751	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58752	let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58753	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
58754	assert_eq_m128(r, e);
58755	let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58756	src, `0b11111111`, a, b,
58757	);
58758	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
58759	assert_eq_m128(r, e);
58760	}
58761
58762	#[simd_test(enable = "avx512f")]
58763	unsafe fn test_mm_maskz_div_round_ss() {
58764	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58765	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
58766	let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58767	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
58768	assert_eq_m128(r, e);
58769	let r =
58770	_mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58771	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
58772	assert_eq_m128(r, e);
58773	}
58774
58775	#[simd_test(enable = "avx512f")]
58776	unsafe fn test_mm_div_round_sd() {
58777	let a = _mm_set_pd(`1.`, `2.`);
58778	let b = _mm_set_pd(`3.`, `4.`);
58779	let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58780	let e = _mm_set_pd(`1.`, `0.5`);
58781	assert_eq_m128d(r, e);
58782	}
58783
58784	#[simd_test(enable = "avx512f")]
58785	unsafe fn test_mm_mask_div_round_sd() {
58786	let src = _mm_set_pd(`10.`, `11.`);
58787	let a = _mm_set_pd(`1.`, `2.`);
58788	let b = _mm_set_pd(`3.`, `4.`);
58789	let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58790	let e = _mm_set_pd(`1.`, `11.`);
58791	assert_eq_m128d(r, e);
58792	let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58793	src, `0b11111111`, a, b,
58794	);
58795	let e = _mm_set_pd(`1.`, `0.5`);
58796	assert_eq_m128d(r, e);
58797	}
58798
58799	#[simd_test(enable = "avx512f")]
58800	unsafe fn test_mm_maskz_div_round_sd() {
58801	let a = _mm_set_pd(`1.`, `2.`);
58802	let b = _mm_set_pd(`3.`, `4.`);
58803	let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58804	let e = _mm_set_pd(`1.`, `0.`);
58805	assert_eq_m128d(r, e);
58806	let r =
58807	_mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58808	let e = _mm_set_pd(`1.`, `0.5`);
58809	assert_eq_m128d(r, e);
58810	}
58811
58812	#[simd_test(enable = "avx512f")]
58813	unsafe fn test_mm_max_round_ss() {
58814	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58815	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
58816	let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58817	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
58818	assert_eq_m128(r, e);
58819	}
58820
58821	#[simd_test(enable = "avx512f")]
58822	unsafe fn test_mm_mask_max_round_ss() {
58823	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58824	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
58825	let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
58826	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58827	assert_eq_m128(r, e);
58828	let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
58829	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
58830	assert_eq_m128(r, e);
58831	}
58832
58833	#[simd_test(enable = "avx512f")]
58834	unsafe fn test_mm_maskz_max_round_ss() {
58835	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58836	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
58837	let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
58838	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
58839	assert_eq_m128(r, e);
58840	let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
58841	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
58842	assert_eq_m128(r, e);
58843	}
58844
58845	#[simd_test(enable = "avx512f")]
58846	unsafe fn test_mm_max_round_sd() {
58847	let a = _mm_set_pd(`0.`, `1.`);
58848	let b = _mm_set_pd(`2.`, `3.`);
58849	let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58850	let e = _mm_set_pd(`0.`, `3.`);
58851	assert_eq_m128d(r, e);
58852	}
58853
58854	#[simd_test(enable = "avx512f")]
58855	unsafe fn test_mm_mask_max_round_sd() {
58856	let a = _mm_set_pd(`0.`, `1.`);
58857	let b = _mm_set_pd(`2.`, `3.`);
58858	let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
58859	let e = _mm_set_pd(`0.`, `1.`);
58860	assert_eq_m128d(r, e);
58861	let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
58862	let e = _mm_set_pd(`0.`, `3.`);
58863	assert_eq_m128d(r, e);
58864	}
58865
58866	#[simd_test(enable = "avx512f")]
58867	unsafe fn test_mm_maskz_max_round_sd() {
58868	let a = _mm_set_pd(`0.`, `1.`);
58869	let b = _mm_set_pd(`2.`, `3.`);
58870	let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
58871	let e = _mm_set_pd(`0.`, `0.`);
58872	assert_eq_m128d(r, e);
58873	let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
58874	let e = _mm_set_pd(`0.`, `3.`);
58875	assert_eq_m128d(r, e);
58876	}
58877
58878	#[simd_test(enable = "avx512f")]
58879	unsafe fn test_mm_min_round_ss() {
58880	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58881	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
58882	let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58883	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58884	assert_eq_m128(r, e);
58885	}
58886
58887	#[simd_test(enable = "avx512f")]
58888	unsafe fn test_mm_mask_min_round_ss() {
58889	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58890	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
58891	let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
58892	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58893	assert_eq_m128(r, e);
58894	let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
58895	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58896	assert_eq_m128(r, e);
58897	}
58898
58899	#[simd_test(enable = "avx512f")]
58900	unsafe fn test_mm_maskz_min_round_ss() {
58901	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58902	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
58903	let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
58904	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
58905	assert_eq_m128(r, e);
58906	let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
58907	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58908	assert_eq_m128(r, e);
58909	}
58910
58911	#[simd_test(enable = "avx512f")]
58912	unsafe fn test_mm_min_round_sd() {
58913	let a = _mm_set_pd(`0.`, `1.`);
58914	let b = _mm_set_pd(`2.`, `3.`);
58915	let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58916	let e = _mm_set_pd(`0.`, `1.`);
58917	assert_eq_m128d(r, e);
58918	}
58919
58920	#[simd_test(enable = "avx512f")]
58921	unsafe fn test_mm_mask_min_round_sd() {
58922	let a = _mm_set_pd(`0.`, `1.`);
58923	let b = _mm_set_pd(`2.`, `3.`);
58924	let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
58925	let e = _mm_set_pd(`0.`, `1.`);
58926	assert_eq_m128d(r, e);
58927	let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
58928	let e = _mm_set_pd(`0.`, `1.`);
58929	assert_eq_m128d(r, e);
58930	}
58931
58932	#[simd_test(enable = "avx512f")]
58933	unsafe fn test_mm_maskz_min_round_sd() {
58934	let a = _mm_set_pd(`0.`, `1.`);
58935	let b = _mm_set_pd(`2.`, `3.`);
58936	let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
58937	let e = _mm_set_pd(`0.`, `0.`);
58938	assert_eq_m128d(r, e);
58939	let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
58940	let e = _mm_set_pd(`0.`, `1.`);
58941	assert_eq_m128d(r, e);
58942	}
58943
58944	#[simd_test(enable = "avx512f")]
58945	unsafe fn test_mm_sqrt_round_ss() {
58946	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58947	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
58948	let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58949	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
58950	assert_eq_m128(r, e);
58951	}
58952
58953	#[simd_test(enable = "avx512f")]
58954	unsafe fn test_mm_mask_sqrt_round_ss() {
58955	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
58956	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58957	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
58958	let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58959	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
58960	assert_eq_m128(r, e);
58961	let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58962	src, `0b11111111`, a, b,
58963	);
58964	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
58965	assert_eq_m128(r, e);
58966	}
58967
58968	#[simd_test(enable = "avx512f")]
58969	unsafe fn test_mm_maskz_sqrt_round_ss() {
58970	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
58971	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
58972	let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
58973	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
58974	assert_eq_m128(r, e);
58975	let r =
58976	_mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
58977	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
58978	assert_eq_m128(r, e);
58979	}
58980
58981	#[simd_test(enable = "avx512f")]
58982	unsafe fn test_mm_sqrt_round_sd() {
58983	let a = _mm_set_pd(`1.`, `2.`);
58984	let b = _mm_set_pd(`3.`, `4.`);
58985	let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
58986	let e = _mm_set_pd(`1.`, `2.`);
58987	assert_eq_m128d(r, e);
58988	}
58989
58990	#[simd_test(enable = "avx512f")]
58991	unsafe fn test_mm_mask_sqrt_round_sd() {
58992	let src = _mm_set_pd(`10.`, `11.`);
58993	let a = _mm_set_pd(`1.`, `2.`);
58994	let b = _mm_set_pd(`3.`, `4.`);
58995	let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
58996	let e = _mm_set_pd(`1.`, `11.`);
58997	assert_eq_m128d(r, e);
58998	let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
58999	src, `0b11111111`, a, b,
59000	);
59001	let e = _mm_set_pd(`1.`, `2.`);
59002	assert_eq_m128d(r, e);
59003	}
59004
59005	#[simd_test(enable = "avx512f")]
59006	unsafe fn test_mm_maskz_sqrt_round_sd() {
59007	let a = _mm_set_pd(`1.`, `2.`);
59008	let b = _mm_set_pd(`3.`, `4.`);
59009	let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
59010	let e = _mm_set_pd(`1.`, `0.`);
59011	assert_eq_m128d(r, e);
59012	let r =
59013	_mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
59014	let e = _mm_set_pd(`1.`, `2.`);
59015	assert_eq_m128d(r, e);
59016	}
59017
59018	#[simd_test(enable = "avx512f")]
59019	unsafe fn test_mm_getexp_round_ss() {
59020	let a = _mm_set1_ps(`2.`);
59021	let b = _mm_set1_ps(`3.`);
59022	let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59023	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
59024	assert_eq_m128(r, e);
59025	}
59026
59027	#[simd_test(enable = "avx512f")]
59028	unsafe fn test_mm_mask_getexp_round_ss() {
59029	let a = _mm_set1_ps(`2.`);
59030	let b = _mm_set1_ps(`3.`);
59031	let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
59032	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
59033	assert_eq_m128(r, e);
59034	let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
59035	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
59036	assert_eq_m128(r, e);
59037	}
59038
59039	#[simd_test(enable = "avx512f")]
59040	unsafe fn test_mm_maskz_getexp_round_ss() {
59041	let a = _mm_set1_ps(`2.`);
59042	let b = _mm_set1_ps(`3.`);
59043	let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
59044	let e = _mm_set_ps(`2.`, `2.`, `2.`, `0.`);
59045	assert_eq_m128(r, e);
59046	let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
59047	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
59048	assert_eq_m128(r, e);
59049	}
59050
59051	#[simd_test(enable = "avx512f")]
59052	unsafe fn test_mm_getexp_round_sd() {
59053	let a = _mm_set1_pd(`2.`);
59054	let b = _mm_set1_pd(`3.`);
59055	let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59056	let e = _mm_set_pd(`2.`, `1.`);
59057	assert_eq_m128d(r, e);
59058	}
59059
59060	#[simd_test(enable = "avx512f")]
59061	unsafe fn test_mm_mask_getexp_round_sd() {
59062	let a = _mm_set1_pd(`2.`);
59063	let b = _mm_set1_pd(`3.`);
59064	let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
59065	let e = _mm_set_pd(`2.`, `2.`);
59066	assert_eq_m128d(r, e);
59067	let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
59068	let e = _mm_set_pd(`2.`, `1.`);
59069	assert_eq_m128d(r, e);
59070	}
59071
59072	#[simd_test(enable = "avx512f")]
59073	unsafe fn test_mm_maskz_getexp_round_sd() {
59074	let a = _mm_set1_pd(`2.`);
59075	let b = _mm_set1_pd(`3.`);
59076	let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
59077	let e = _mm_set_pd(`2.`, `0.`);
59078	assert_eq_m128d(r, e);
59079	let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
59080	let e = _mm_set_pd(`2.`, `1.`);
59081	assert_eq_m128d(r, e);
59082	}
59083
59084	#[simd_test(enable = "avx512f")]
59085	unsafe fn test_mm_getmant_round_ss() {
59086	let a = _mm_set1_ps(`20.`);
59087	let b = _mm_set1_ps(`10.`);
59088	let r =
59089	_mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59090	a, b,
59091	);
59092	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
59093	assert_eq_m128(r, e);
59094	}
59095
59096	#[simd_test(enable = "avx512f")]
59097	unsafe fn test_mm_mask_getmant_round_ss() {
59098	let a = _mm_set1_ps(`20.`);
59099	let b = _mm_set1_ps(`10.`);
59100	let r = _mm_mask_getmant_round_ss::<
59101	_MM_MANT_NORM_1_2,
59102	_MM_MANT_SIGN_SRC,
59103	_MM_FROUND_CUR_DIRECTION,
59104	>(a, `0`, a, b);
59105	let e = _mm_set_ps(`20.`, `20.`, `20.`, `20.`);
59106	assert_eq_m128(r, e);
59107	let r = _mm_mask_getmant_round_ss::<
59108	_MM_MANT_NORM_1_2,
59109	_MM_MANT_SIGN_SRC,
59110	_MM_FROUND_CUR_DIRECTION,
59111	>(a, `0b11111111`, a, b);
59112	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
59113	assert_eq_m128(r, e);
59114	}
59115
59116	#[simd_test(enable = "avx512f")]
59117	unsafe fn test_mm_maskz_getmant_round_ss() {
59118	let a = _mm_set1_ps(`20.`);
59119	let b = _mm_set1_ps(`10.`);
59120	let r = _mm_maskz_getmant_round_ss::<
59121	_MM_MANT_NORM_1_2,
59122	_MM_MANT_SIGN_SRC,
59123	_MM_FROUND_CUR_DIRECTION,
59124	>(`0`, a, b);
59125	let e = _mm_set_ps(`20.`, `20.`, `20.`, `0.`);
59126	assert_eq_m128(r, e);
59127	let r = _mm_maskz_getmant_round_ss::<
59128	_MM_MANT_NORM_1_2,
59129	_MM_MANT_SIGN_SRC,
59130	_MM_FROUND_CUR_DIRECTION,
59131	>(`0b11111111`, a, b);
59132	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
59133	assert_eq_m128(r, e);
59134	}
59135
59136	#[simd_test(enable = "avx512f")]
59137	unsafe fn test_mm_getmant_round_sd() {
59138	let a = _mm_set1_pd(`20.`);
59139	let b = _mm_set1_pd(`10.`);
59140	let r =
59141	_mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59142	a, b,
59143	);
59144	let e = _mm_set_pd(`20.`, `1.25`);
59145	assert_eq_m128d(r, e);
59146	}
59147
59148	#[simd_test(enable = "avx512f")]
59149	unsafe fn test_mm_mask_getmant_round_sd() {
59150	let a = _mm_set1_pd(`20.`);
59151	let b = _mm_set1_pd(`10.`);
59152	let r = _mm_mask_getmant_round_sd::<
59153	_MM_MANT_NORM_1_2,
59154	_MM_MANT_SIGN_SRC,
59155	_MM_FROUND_CUR_DIRECTION,
59156	>(a, `0`, a, b);
59157	let e = _mm_set_pd(`20.`, `20.`);
59158	assert_eq_m128d(r, e);
59159	let r = _mm_mask_getmant_round_sd::<
59160	_MM_MANT_NORM_1_2,
59161	_MM_MANT_SIGN_SRC,
59162	_MM_FROUND_CUR_DIRECTION,
59163	>(a, `0b11111111`, a, b);
59164	let e = _mm_set_pd(`20.`, `1.25`);
59165	assert_eq_m128d(r, e);
59166	}
59167
59168	#[simd_test(enable = "avx512f")]
59169	unsafe fn test_mm_maskz_getmant_round_sd() {
59170	let a = _mm_set1_pd(`20.`);
59171	let b = _mm_set1_pd(`10.`);
59172	let r = _mm_maskz_getmant_round_sd::<
59173	_MM_MANT_NORM_1_2,
59174	_MM_MANT_SIGN_SRC,
59175	_MM_FROUND_CUR_DIRECTION,
59176	>(`0`, a, b);
59177	let e = _mm_set_pd(`20.`, `0.`);
59178	assert_eq_m128d(r, e);
59179	let r = _mm_maskz_getmant_round_sd::<
59180	_MM_MANT_NORM_1_2,
59181	_MM_MANT_SIGN_SRC,
59182	_MM_FROUND_CUR_DIRECTION,
59183	>(`0b11111111`, a, b);
59184	let e = _mm_set_pd(`20.`, `1.25`);
59185	assert_eq_m128d(r, e);
59186	}
59187
59188	#[simd_test(enable = "avx512f")]
59189	unsafe fn test_mm_roundscale_round_ss() {
59190	let a = _mm_set1_ps(`2.2`);
59191	let b = _mm_set1_ps(`1.1`);
59192	let r = _mm_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
59193	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
59194	assert_eq_m128(r, e);
59195	}
59196
59197	#[simd_test(enable = "avx512f")]
59198	unsafe fn test_mm_mask_roundscale_round_ss() {
59199	let a = _mm_set1_ps(`2.2`);
59200	let b = _mm_set1_ps(`1.1`);
59201	let r = _mm_mask_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
59202	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `2.2`);
59203	assert_eq_m128(r, e);
59204	let r = _mm_mask_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
59205	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
59206	assert_eq_m128(r, e);
59207	}
59208
59209	#[simd_test(enable = "avx512f")]
59210	unsafe fn test_mm_maskz_roundscale_round_ss() {
59211	let a = _mm_set1_ps(`2.2`);
59212	let b = _mm_set1_ps(`1.1`);
59213	let r = _mm_maskz_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a, b);
59214	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `0.0`);
59215	assert_eq_m128(r, e);
59216	let r = _mm_maskz_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
59217	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
59218	assert_eq_m128(r, e);
59219	}
59220
59221	#[simd_test(enable = "avx512f")]
59222	unsafe fn test_mm_roundscale_round_sd() {
59223	let a = _mm_set1_pd(`2.2`);
59224	let b = _mm_set1_pd(`1.1`);
59225	let r = _mm_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
59226	let e = _mm_set_pd(`2.2`, `1.0`);
59227	assert_eq_m128d(r, e);
59228	}
59229
59230	#[simd_test(enable = "avx512f")]
59231	unsafe fn test_mm_mask_roundscale_round_sd() {
59232	let a = _mm_set1_pd(`2.2`);
59233	let b = _mm_set1_pd(`1.1`);
59234	let r = _mm_mask_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
59235	let e = _mm_set_pd(`2.2`, `2.2`);
59236	assert_eq_m128d(r, e);
59237	let r = _mm_mask_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
59238	let e = _mm_set_pd(`2.2`, `1.0`);
59239	assert_eq_m128d(r, e);
59240	}
59241
59242	#[simd_test(enable = "avx512f")]
59243	unsafe fn test_mm_maskz_roundscale_round_sd() {
59244	let a = _mm_set1_pd(`2.2`);
59245	let b = _mm_set1_pd(`1.1`);
59246	let r = _mm_maskz_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a, b);
59247	let e = _mm_set_pd(`2.2`, `0.0`);
59248	assert_eq_m128d(r, e);
59249	let r = _mm_maskz_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
59250	let e = _mm_set_pd(`2.2`, `1.0`);
59251	assert_eq_m128d(r, e);
59252	}
59253
59254	#[simd_test(enable = "avx512f")]
59255	unsafe fn test_mm_scalef_round_ss() {
59256	let a = _mm_set1_ps(`1.`);
59257	let b = _mm_set1_ps(`3.`);
59258	let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
59259	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
59260	assert_eq_m128(r, e);
59261	}
59262
59263	#[simd_test(enable = "avx512f")]
59264	unsafe fn test_mm_mask_scalef_round_ss() {
59265	let a = _mm_set1_ps(`1.`);
59266	let b = _mm_set1_ps(`3.`);
59267	let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59268	a, `0`, a, b,
59269	);
59270	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
59271	assert_eq_m128(r, e);
59272	let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59273	a, `0b11111111`, a, b,
59274	);
59275	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
59276	assert_eq_m128(r, e);
59277	}
59278
59279	#[simd_test(enable = "avx512f")]
59280	unsafe fn test_mm_maskz_scalef_round_ss() {
59281	let a = _mm_set1_ps(`1.`);
59282	let b = _mm_set1_ps(`3.`);
59283	let r =
59284	_mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
59285	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
59286	assert_eq_m128(r, e);
59287	let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59288	`0b11111111`, a, b,
59289	);
59290	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
59291	assert_eq_m128(r, e);
59292	}
59293
59294	#[simd_test(enable = "avx512f")]
59295	unsafe fn test_mm_scalef_round_sd() {
59296	let a = _mm_set1_pd(`1.`);
59297	let b = _mm_set1_pd(`3.`);
59298	let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
59299	let e = _mm_set_pd(`1.`, `8.`);
59300	assert_eq_m128d(r, e);
59301	}
59302
59303	#[simd_test(enable = "avx512f")]
59304	unsafe fn test_mm_mask_scalef_round_sd() {
59305	let a = _mm_set1_pd(`1.`);
59306	let b = _mm_set1_pd(`3.`);
59307	let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59308	a, `0`, a, b,
59309	);
59310	let e = _mm_set_pd(`1.`, `1.`);
59311	assert_eq_m128d(r, e);
59312	let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59313	a, `0b11111111`, a, b,
59314	);
59315	let e = _mm_set_pd(`1.`, `8.`);
59316	assert_eq_m128d(r, e);
59317	}
59318
59319	#[simd_test(enable = "avx512f")]
59320	unsafe fn test_mm_maskz_scalef_round_sd() {
59321	let a = _mm_set1_pd(`1.`);
59322	let b = _mm_set1_pd(`3.`);
59323	let r =
59324	_mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
59325	let e = _mm_set_pd(`1.`, `0.`);
59326	assert_eq_m128d(r, e);
59327	let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59328	`0b11111111`, a, b,
59329	);
59330	let e = _mm_set_pd(`1.`, `8.`);
59331	assert_eq_m128d(r, e);
59332	}
59333
59334	#[simd_test(enable = "avx512f")]
59335	unsafe fn test_mm_fmadd_round_ss() {
59336	let a = _mm_set1_ps(`1.`);
59337	let b = _mm_set1_ps(`2.`);
59338	let c = _mm_set1_ps(`3.`);
59339	let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59340	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
59341	assert_eq_m128(r, e);
59342	}
59343
59344	#[simd_test(enable = "avx512f")]
59345	unsafe fn test_mm_mask_fmadd_round_ss() {
59346	let a = _mm_set1_ps(`1.`);
59347	let b = _mm_set1_ps(`2.`);
59348	let c = _mm_set1_ps(`3.`);
59349	let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59350	a, `0`, b, c,
59351	);
59352	assert_eq_m128(r, a);
59353	let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59354	a, `0b11111111`, b, c,
59355	);
59356	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
59357	assert_eq_m128(r, e);
59358	}
59359
59360	#[simd_test(enable = "avx512f")]
59361	unsafe fn test_mm_maskz_fmadd_round_ss() {
59362	let a = _mm_set1_ps(`1.`);
59363	let b = _mm_set1_ps(`2.`);
59364	let c = _mm_set1_ps(`3.`);
59365	let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59366	`0`, a, b, c,
59367	);
59368	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
59369	assert_eq_m128(r, e);
59370	let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59371	`0b11111111`, a, b, c,
59372	);
59373	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
59374	assert_eq_m128(r, e);
59375	}
59376
59377	#[simd_test(enable = "avx512f")]
59378	unsafe fn test_mm_mask3_fmadd_round_ss() {
59379	let a = _mm_set1_ps(`1.`);
59380	let b = _mm_set1_ps(`2.`);
59381	let c = _mm_set1_ps(`3.`);
59382	let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59383	a, b, c, `0`,
59384	);
59385	assert_eq_m128(r, c);
59386	let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59387	a, b, c, `0b11111111`,
59388	);
59389	let e = _mm_set_ps(`3.`, `3.`, `3.`, `5.`);
59390	assert_eq_m128(r, e);
59391	}
59392
59393	#[simd_test(enable = "avx512f")]
59394	unsafe fn test_mm_fmadd_round_sd() {
59395	let a = _mm_set1_pd(`1.`);
59396	let b = _mm_set1_pd(`2.`);
59397	let c = _mm_set1_pd(`3.`);
59398	let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59399	let e = _mm_set_pd(`1.`, `5.`);
59400	assert_eq_m128d(r, e);
59401	}
59402
59403	#[simd_test(enable = "avx512f")]
59404	unsafe fn test_mm_mask_fmadd_round_sd() {
59405	let a = _mm_set1_pd(`1.`);
59406	let b = _mm_set1_pd(`2.`);
59407	let c = _mm_set1_pd(`3.`);
59408	let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59409	a, `0`, b, c,
59410	);
59411	assert_eq_m128d(r, a);
59412	let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59413	a, `0b11111111`, b, c,
59414	);
59415	let e = _mm_set_pd(`1.`, `5.`);
59416	assert_eq_m128d(r, e);
59417	}
59418
59419	#[simd_test(enable = "avx512f")]
59420	unsafe fn test_mm_maskz_fmadd_round_sd() {
59421	let a = _mm_set1_pd(`1.`);
59422	let b = _mm_set1_pd(`2.`);
59423	let c = _mm_set1_pd(`3.`);
59424	let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59425	`0`, a, b, c,
59426	);
59427	let e = _mm_set_pd(`1.`, `0.`);
59428	assert_eq_m128d(r, e);
59429	let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59430	`0b11111111`, a, b, c,
59431	);
59432	let e = _mm_set_pd(`1.`, `5.`);
59433	assert_eq_m128d(r, e);
59434	}
59435
59436	#[simd_test(enable = "avx512f")]
59437	unsafe fn test_mm_mask3_fmadd_round_sd() {
59438	let a = _mm_set1_pd(`1.`);
59439	let b = _mm_set1_pd(`2.`);
59440	let c = _mm_set1_pd(`3.`);
59441	let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59442	a, b, c, `0`,
59443	);
59444	assert_eq_m128d(r, c);
59445	let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59446	a, b, c, `0b11111111`,
59447	);
59448	let e = _mm_set_pd(`3.`, `5.`);
59449	assert_eq_m128d(r, e);
59450	}
59451
59452	#[simd_test(enable = "avx512f")]
59453	unsafe fn test_mm_fmsub_round_ss() {
59454	let a = _mm_set1_ps(`1.`);
59455	let b = _mm_set1_ps(`2.`);
59456	let c = _mm_set1_ps(`3.`);
59457	let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59458	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
59459	assert_eq_m128(r, e);
59460	}
59461
59462	#[simd_test(enable = "avx512f")]
59463	unsafe fn test_mm_mask_fmsub_round_ss() {
59464	let a = _mm_set1_ps(`1.`);
59465	let b = _mm_set1_ps(`2.`);
59466	let c = _mm_set1_ps(`3.`);
59467	let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59468	a, `0`, b, c,
59469	);
59470	assert_eq_m128(r, a);
59471	let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59472	a, `0b11111111`, b, c,
59473	);
59474	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
59475	assert_eq_m128(r, e);
59476	}
59477
59478	#[simd_test(enable = "avx512f")]
59479	unsafe fn test_mm_maskz_fmsub_round_ss() {
59480	let a = _mm_set1_ps(`1.`);
59481	let b = _mm_set1_ps(`2.`);
59482	let c = _mm_set1_ps(`3.`);
59483	let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59484	`0`, a, b, c,
59485	);
59486	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
59487	assert_eq_m128(r, e);
59488	let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59489	`0b11111111`, a, b, c,
59490	);
59491	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
59492	assert_eq_m128(r, e);
59493	}
59494
59495	#[simd_test(enable = "avx512f")]
59496	unsafe fn test_mm_mask3_fmsub_round_ss() {
59497	let a = _mm_set1_ps(`1.`);
59498	let b = _mm_set1_ps(`2.`);
59499	let c = _mm_set1_ps(`3.`);
59500	let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59501	a, b, c, `0`,
59502	);
59503	assert_eq_m128(r, c);
59504	let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59505	a, b, c, `0b11111111`,
59506	);
59507	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-1.`);
59508	assert_eq_m128(r, e);
59509	}
59510
59511	#[simd_test(enable = "avx512f")]
59512	unsafe fn test_mm_fmsub_round_sd() {
59513	let a = _mm_set1_pd(`1.`);
59514	let b = _mm_set1_pd(`2.`);
59515	let c = _mm_set1_pd(`3.`);
59516	let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59517	let e = _mm_set_pd(`1.`, `-1.`);
59518	assert_eq_m128d(r, e);
59519	}
59520
59521	#[simd_test(enable = "avx512f")]
59522	unsafe fn test_mm_mask_fmsub_round_sd() {
59523	let a = _mm_set1_pd(`1.`);
59524	let b = _mm_set1_pd(`2.`);
59525	let c = _mm_set1_pd(`3.`);
59526	let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59527	a, `0`, b, c,
59528	);
59529	assert_eq_m128d(r, a);
59530	let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59531	a, `0b11111111`, b, c,
59532	);
59533	let e = _mm_set_pd(`1.`, `-1.`);
59534	assert_eq_m128d(r, e);
59535	}
59536
59537	#[simd_test(enable = "avx512f")]
59538	unsafe fn test_mm_maskz_fmsub_round_sd() {
59539	let a = _mm_set1_pd(`1.`);
59540	let b = _mm_set1_pd(`2.`);
59541	let c = _mm_set1_pd(`3.`);
59542	let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59543	`0`, a, b, c,
59544	);
59545	let e = _mm_set_pd(`1.`, `0.`);
59546	assert_eq_m128d(r, e);
59547	let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59548	`0b11111111`, a, b, c,
59549	);
59550	let e = _mm_set_pd(`1.`, `-1.`);
59551	assert_eq_m128d(r, e);
59552	}
59553
59554	#[simd_test(enable = "avx512f")]
59555	unsafe fn test_mm_mask3_fmsub_round_sd() {
59556	let a = _mm_set1_pd(`1.`);
59557	let b = _mm_set1_pd(`2.`);
59558	let c = _mm_set1_pd(`3.`);
59559	let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59560	a, b, c, `0`,
59561	);
59562	assert_eq_m128d(r, c);
59563	let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59564	a, b, c, `0b11111111`,
59565	);
59566	let e = _mm_set_pd(`3.`, `-1.`);
59567	assert_eq_m128d(r, e);
59568	}
59569
59570	#[simd_test(enable = "avx512f")]
59571	unsafe fn test_mm_fnmadd_round_ss() {
59572	let a = _mm_set1_ps(`1.`);
59573	let b = _mm_set1_ps(`2.`);
59574	let c = _mm_set1_ps(`3.`);
59575	let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59576	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
59577	assert_eq_m128(r, e);
59578	}
59579
59580	#[simd_test(enable = "avx512f")]
59581	unsafe fn test_mm_mask_fnmadd_round_ss() {
59582	let a = _mm_set1_ps(`1.`);
59583	let b = _mm_set1_ps(`2.`);
59584	let c = _mm_set1_ps(`3.`);
59585	let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59586	a, `0`, b, c,
59587	);
59588	assert_eq_m128(r, a);
59589	let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59590	a, `0b11111111`, b, c,
59591	);
59592	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
59593	assert_eq_m128(r, e);
59594	}
59595
59596	#[simd_test(enable = "avx512f")]
59597	unsafe fn test_mm_maskz_fnmadd_round_ss() {
59598	let a = _mm_set1_ps(`1.`);
59599	let b = _mm_set1_ps(`2.`);
59600	let c = _mm_set1_ps(`3.`);
59601	let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59602	`0`, a, b, c,
59603	);
59604	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
59605	assert_eq_m128(r, e);
59606	let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59607	`0b11111111`, a, b, c,
59608	);
59609	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
59610	assert_eq_m128(r, e);
59611	}
59612
59613	#[simd_test(enable = "avx512f")]
59614	unsafe fn test_mm_mask3_fnmadd_round_ss() {
59615	let a = _mm_set1_ps(`1.`);
59616	let b = _mm_set1_ps(`2.`);
59617	let c = _mm_set1_ps(`3.`);
59618	let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59619	a, b, c, `0`,
59620	);
59621	assert_eq_m128(r, c);
59622	let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59623	a, b, c, `0b11111111`,
59624	);
59625	let e = _mm_set_ps(`3.`, `3.`, `3.`, `1.`);
59626	assert_eq_m128(r, e);
59627	}
59628
59629	#[simd_test(enable = "avx512f")]
59630	unsafe fn test_mm_fnmadd_round_sd() {
59631	let a = _mm_set1_pd(`1.`);
59632	let b = _mm_set1_pd(`2.`);
59633	let c = _mm_set1_pd(`3.`);
59634	let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59635	let e = _mm_set_pd(`1.`, `1.`);
59636	assert_eq_m128d(r, e);
59637	}
59638
59639	#[simd_test(enable = "avx512f")]
59640	unsafe fn test_mm_mask_fnmadd_round_sd() {
59641	let a = _mm_set1_pd(`1.`);
59642	let b = _mm_set1_pd(`2.`);
59643	let c = _mm_set1_pd(`3.`);
59644	let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59645	a, `0`, b, c,
59646	);
59647	assert_eq_m128d(r, a);
59648	let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59649	a, `0b11111111`, b, c,
59650	);
59651	let e = _mm_set_pd(`1.`, `1.`);
59652	assert_eq_m128d(r, e);
59653	}
59654
59655	#[simd_test(enable = "avx512f")]
59656	unsafe fn test_mm_maskz_fnmadd_round_sd() {
59657	let a = _mm_set1_pd(`1.`);
59658	let b = _mm_set1_pd(`2.`);
59659	let c = _mm_set1_pd(`3.`);
59660	let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59661	`0`, a, b, c,
59662	);
59663	let e = _mm_set_pd(`1.`, `0.`);
59664	assert_eq_m128d(r, e);
59665	let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59666	`0b11111111`, a, b, c,
59667	);
59668	let e = _mm_set_pd(`1.`, `1.`);
59669	assert_eq_m128d(r, e);
59670	}
59671
59672	#[simd_test(enable = "avx512f")]
59673	unsafe fn test_mm_mask3_fnmadd_round_sd() {
59674	let a = _mm_set1_pd(`1.`);
59675	let b = _mm_set1_pd(`2.`);
59676	let c = _mm_set1_pd(`3.`);
59677	let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59678	a, b, c, `0`,
59679	);
59680	assert_eq_m128d(r, c);
59681	let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59682	a, b, c, `0b11111111`,
59683	);
59684	let e = _mm_set_pd(`3.`, `1.`);
59685	assert_eq_m128d(r, e);
59686	}
59687
59688	#[simd_test(enable = "avx512f")]
59689	unsafe fn test_mm_fnmsub_round_ss() {
59690	let a = _mm_set1_ps(`1.`);
59691	let b = _mm_set1_ps(`2.`);
59692	let c = _mm_set1_ps(`3.`);
59693	let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59694	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
59695	assert_eq_m128(r, e);
59696	}
59697
59698	#[simd_test(enable = "avx512f")]
59699	unsafe fn test_mm_mask_fnmsub_round_ss() {
59700	let a = _mm_set1_ps(`1.`);
59701	let b = _mm_set1_ps(`2.`);
59702	let c = _mm_set1_ps(`3.`);
59703	let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59704	a, `0`, b, c,
59705	);
59706	assert_eq_m128(r, a);
59707	let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59708	a, `0b11111111`, b, c,
59709	);
59710	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
59711	assert_eq_m128(r, e);
59712	}
59713
59714	#[simd_test(enable = "avx512f")]
59715	unsafe fn test_mm_maskz_fnmsub_round_ss() {
59716	let a = _mm_set1_ps(`1.`);
59717	let b = _mm_set1_ps(`2.`);
59718	let c = _mm_set1_ps(`3.`);
59719	let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59720	`0`, a, b, c,
59721	);
59722	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
59723	assert_eq_m128(r, e);
59724	let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59725	`0b11111111`, a, b, c,
59726	);
59727	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
59728	assert_eq_m128(r, e);
59729	}
59730
59731	#[simd_test(enable = "avx512f")]
59732	unsafe fn test_mm_mask3_fnmsub_round_ss() {
59733	let a = _mm_set1_ps(`1.`);
59734	let b = _mm_set1_ps(`2.`);
59735	let c = _mm_set1_ps(`3.`);
59736	let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59737	a, b, c, `0`,
59738	);
59739	assert_eq_m128(r, c);
59740	let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59741	a, b, c, `0b11111111`,
59742	);
59743	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-5.`);
59744	assert_eq_m128(r, e);
59745	}
59746
59747	#[simd_test(enable = "avx512f")]
59748	unsafe fn test_mm_fnmsub_round_sd() {
59749	let a = _mm_set1_pd(`1.`);
59750	let b = _mm_set1_pd(`2.`);
59751	let c = _mm_set1_pd(`3.`);
59752	let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
59753	let e = _mm_set_pd(`1.`, `-5.`);
59754	assert_eq_m128d(r, e);
59755	}
59756
59757	#[simd_test(enable = "avx512f")]
59758	unsafe fn test_mm_mask_fnmsub_round_sd() {
59759	let a = _mm_set1_pd(`1.`);
59760	let b = _mm_set1_pd(`2.`);
59761	let c = _mm_set1_pd(`3.`);
59762	let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59763	a, `0`, b, c,
59764	);
59765	assert_eq_m128d(r, a);
59766	let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59767	a, `0b11111111`, b, c,
59768	);
59769	let e = _mm_set_pd(`1.`, `-5.`);
59770	assert_eq_m128d(r, e);
59771	}
59772
59773	#[simd_test(enable = "avx512f")]
59774	unsafe fn test_mm_maskz_fnmsub_round_sd() {
59775	let a = _mm_set1_pd(`1.`);
59776	let b = _mm_set1_pd(`2.`);
59777	let c = _mm_set1_pd(`3.`);
59778	let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59779	`0`, a, b, c,
59780	);
59781	let e = _mm_set_pd(`1.`, `0.`);
59782	assert_eq_m128d(r, e);
59783	let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59784	`0b11111111`, a, b, c,
59785	);
59786	let e = _mm_set_pd(`1.`, `-5.`);
59787	assert_eq_m128d(r, e);
59788	}
59789
59790	#[simd_test(enable = "avx512f")]
59791	unsafe fn test_mm_mask3_fnmsub_round_sd() {
59792	let a = _mm_set1_pd(`1.`);
59793	let b = _mm_set1_pd(`2.`);
59794	let c = _mm_set1_pd(`3.`);
59795	let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59796	a, b, c, `0`,
59797	);
59798	assert_eq_m128d(r, c);
59799	let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
59800	a, b, c, `0b11111111`,
59801	);
59802	let e = _mm_set_pd(`3.`, `-5.`);
59803	assert_eq_m128d(r, e);
59804	}
59805
59806	#[simd_test(enable = "avx512f")]
59807	unsafe fn test_mm_fixupimm_ss() {
59808	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
59809	let b = _mm_set1_ps(f32::MAX);
59810	let c = _mm_set1_epi32(i32::MAX);
59811	let r = _mm_fixupimm_ss::<`5`>(a, b, c);
59812	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
59813	assert_eq_m128(r, e);
59814	}
59815
59816	#[simd_test(enable = "avx512f")]
59817	unsafe fn test_mm_mask_fixupimm_ss() {
59818	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
59819	let b = _mm_set1_ps(f32::MAX);
59820	let c = _mm_set1_epi32(i32::MAX);
59821	let r = _mm_mask_fixupimm_ss::<`5`>(a, `0b11111111`, b, c);
59822	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
59823	assert_eq_m128(r, e);
59824	}
59825
59826	#[simd_test(enable = "avx512f")]
59827	unsafe fn test_mm_maskz_fixupimm_ss() {
59828	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
59829	let b = _mm_set1_ps(f32::MAX);
59830	let c = _mm_set1_epi32(i32::MAX);
59831	let r = _mm_maskz_fixupimm_ss::<`5`>(`0b00000000`, a, b, c);
59832	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.0`);
59833	assert_eq_m128(r, e);
59834	let r = _mm_maskz_fixupimm_ss::<`5`>(`0b11111111`, a, b, c);
59835	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
59836	assert_eq_m128(r, e);
59837	}
59838
59839	#[simd_test(enable = "avx512f")]
59840	unsafe fn test_mm_fixupimm_sd() {
59841	let a = _mm_set_pd(`0.`, f64::NAN);
59842	let b = _mm_set1_pd(f64::MAX);
59843	let c = _mm_set1_epi64x(i32::MAX as i64);
59844	let r = _mm_fixupimm_sd::<`5`>(a, b, c);
59845	let e = _mm_set_pd(`0.`, `-0.0`);
59846	assert_eq_m128d(r, e);
59847	}
59848
59849	#[simd_test(enable = "avx512f")]
59850	unsafe fn test_mm_mask_fixupimm_sd() {
59851	let a = _mm_set_pd(`0.`, f64::NAN);
59852	let b = _mm_set1_pd(f64::MAX);
59853	let c = _mm_set1_epi64x(i32::MAX as i64);
59854	let r = _mm_mask_fixupimm_sd::<`5`>(a, `0b11111111`, b, c);
59855	let e = _mm_set_pd(`0.`, `-0.0`);
59856	assert_eq_m128d(r, e);
59857	}
59858
59859	#[simd_test(enable = "avx512f")]
59860	unsafe fn test_mm_maskz_fixupimm_sd() {
59861	let a = _mm_set_pd(`0.`, f64::NAN);
59862	let b = _mm_set1_pd(f64::MAX);
59863	let c = _mm_set1_epi64x(i32::MAX as i64);
59864	let r = _mm_maskz_fixupimm_sd::<`5`>(`0b00000000`, a, b, c);
59865	let e = _mm_set_pd(`0.`, `0.0`);
59866	assert_eq_m128d(r, e);
59867	let r = _mm_maskz_fixupimm_sd::<`5`>(`0b11111111`, a, b, c);
59868	let e = _mm_set_pd(`0.`, `-0.0`);
59869	assert_eq_m128d(r, e);
59870	}
59871
59872	#[simd_test(enable = "avx512f")]
59873	unsafe fn test_mm_fixupimm_round_ss() {
59874	let a = _mm_set_ps(`1.`, `0.`, `0.`, f32::NAN);
59875	let b = _mm_set1_ps(f32::MAX);
59876	let c = _mm_set1_epi32(i32::MAX);
59877	let r = _mm_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59878	let e = _mm_set_ps(`1.`, `0.`, `0.`, `-0.0`);
59879	assert_eq_m128(r, e);
59880	}
59881
59882	#[simd_test(enable = "avx512f")]
59883	unsafe fn test_mm_mask_fixupimm_round_ss() {
59884	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
59885	let b = _mm_set1_ps(f32::MAX);
59886	let c = _mm_set1_epi32(i32::MAX);
59887	let r = _mm_mask_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, b, c);
59888	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
59889	assert_eq_m128(r, e);
59890	}
59891
59892	#[simd_test(enable = "avx512f")]
59893	unsafe fn test_mm_maskz_fixupimm_round_ss() {
59894	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
59895	let b = _mm_set1_ps(f32::MAX);
59896	let c = _mm_set1_epi32(i32::MAX);
59897	let r = _mm_maskz_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b00000000`, a, b, c);
59898	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.0`);
59899	assert_eq_m128(r, e);
59900	let r = _mm_maskz_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b, c);
59901	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
59902	assert_eq_m128(r, e);
59903	}
59904
59905	#[simd_test(enable = "avx512f")]
59906	unsafe fn test_mm_fixupimm_round_sd() {
59907	let a = _mm_set_pd(`0.`, f64::NAN);
59908	let b = _mm_set1_pd(f64::MAX);
59909	let c = _mm_set1_epi64x(i32::MAX as i64);
59910	let r = _mm_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59911	let e = _mm_set_pd(`0.`, `-0.0`);
59912	assert_eq_m128d(r, e);
59913	}
59914
59915	#[simd_test(enable = "avx512f")]
59916	unsafe fn test_mm_mask_fixupimm_round_sd() {
59917	let a = _mm_set_pd(`0.`, f64::NAN);
59918	let b = _mm_set1_pd(f64::MAX);
59919	let c = _mm_set1_epi64x(i32::MAX as i64);
59920	let r = _mm_mask_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, b, c);
59921	let e = _mm_set_pd(`0.`, `-0.0`);
59922	assert_eq_m128d(r, e);
59923	}
59924
59925	#[simd_test(enable = "avx512f")]
59926	unsafe fn test_mm_maskz_fixupimm_round_sd() {
59927	let a = _mm_set_pd(`0.`, f64::NAN);
59928	let b = _mm_set1_pd(f64::MAX);
59929	let c = _mm_set1_epi64x(i32::MAX as i64);
59930	let r = _mm_maskz_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b00000000`, a, b, c);
59931	let e = _mm_set_pd(`0.`, `0.0`);
59932	assert_eq_m128d(r, e);
59933	let r = _mm_maskz_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b, c);
59934	let e = _mm_set_pd(`0.`, `-0.0`);
59935	assert_eq_m128d(r, e);
59936	}
59937
59938	#[simd_test(enable = "avx512f")]
59939	unsafe fn test_mm_mask_cvtss_sd() {
59940	let a = _mm_set_pd(`6.`, `-7.5`);
59941	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
59942	let r = _mm_mask_cvtss_sd(a, `0`, a, b);
59943	assert_eq_m128d(r, a);
59944	let r = _mm_mask_cvtss_sd(a, `0b11111111`, a, b);
59945	let e = _mm_set_pd(`6.`, `-1.5`);
59946	assert_eq_m128d(r, e);
59947	}
59948
59949	#[simd_test(enable = "avx512f")]
59950	unsafe fn test_mm_maskz_cvtss_sd() {
59951	let a = _mm_set_pd(`6.`, `-7.5`);
59952	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
59953	let r = _mm_maskz_cvtss_sd(`0`, a, b);
59954	let e = _mm_set_pd(`6.`, `0.`);
59955	assert_eq_m128d(r, e);
59956	let r = _mm_maskz_cvtss_sd(`0b11111111`, a, b);
59957	let e = _mm_set_pd(`6.`, `-1.5`);
59958	assert_eq_m128d(r, e);
59959	}
59960
59961	#[simd_test(enable = "avx512f")]
59962	unsafe fn test_mm_mask_cvtsd_ss() {
59963	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
59964	let b = _mm_set_pd(`6.`, `-7.5`);
59965	let r = _mm_mask_cvtsd_ss(a, `0`, a, b);
59966	assert_eq_m128(r, a);
59967	let r = _mm_mask_cvtsd_ss(a, `0b11111111`, a, b);
59968	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
59969	assert_eq_m128(r, e);
59970	}
59971
59972	#[simd_test(enable = "avx512f")]
59973	unsafe fn test_mm_maskz_cvtsd_ss() {
59974	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
59975	let b = _mm_set_pd(`6.`, `-7.5`);
59976	let r = _mm_maskz_cvtsd_ss(`0`, a, b);
59977	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `0.`);
59978	assert_eq_m128(r, e);
59979	let r = _mm_maskz_cvtsd_ss(`0b11111111`, a, b);
59980	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
59981	assert_eq_m128(r, e);
59982	}
59983
59984	#[simd_test(enable = "avx512f")]
59985	unsafe fn test_mm_cvt_roundss_sd() {
59986	let a = _mm_set_pd(`6.`, `-7.5`);
59987	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
59988	let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59989	let e = _mm_set_pd(`6.`, `-1.5`);
59990	assert_eq_m128d(r, e);
59991	}
59992
59993	#[simd_test(enable = "avx512f")]
59994	unsafe fn test_mm_mask_cvt_roundss_sd() {
59995	let a = _mm_set_pd(`6.`, `-7.5`);
59996	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
59997	let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
59998	assert_eq_m128d(r, a);
59999	let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
60000	let e = _mm_set_pd(`6.`, `-1.5`);
60001	assert_eq_m128d(r, e);
60002	}
60003
60004	#[simd_test(enable = "avx512f")]
60005	unsafe fn test_mm_maskz_cvt_roundss_sd() {
60006	let a = _mm_set_pd(`6.`, `-7.5`);
60007	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60008	let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
60009	let e = _mm_set_pd(`6.`, `0.`);
60010	assert_eq_m128d(r, e);
60011	let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
60012	let e = _mm_set_pd(`6.`, `-1.5`);
60013	assert_eq_m128d(r, e);
60014	}
60015
60016	#[simd_test(enable = "avx512f")]
60017	unsafe fn test_mm_cvt_roundsd_ss() {
60018	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60019	let b = _mm_set_pd(`6.`, `-7.5`);
60020	let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60021	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
60022	assert_eq_m128(r, e);
60023	}
60024
60025	#[simd_test(enable = "avx512f")]
60026	unsafe fn test_mm_mask_cvt_roundsd_ss() {
60027	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60028	let b = _mm_set_pd(`6.`, `-7.5`);
60029	let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, `0`, a, b);
60030	assert_eq_m128(r, a);
60031	let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60032	a, `0b11111111`, a, b,
60033	);
60034	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
60035	assert_eq_m128(r, e);
60036	}
60037
60038	#[simd_test(enable = "avx512f")]
60039	unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60040	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60041	let b = _mm_set_pd(`6.`, `-7.5`);
60042	let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60043	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `0.`);
60044	assert_eq_m128(r, e);
60045	let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60046	`0b11111111`, a, b,
60047	);
60048	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
60049	assert_eq_m128(r, e);
60050	}
60051
60052	#[simd_test(enable = "avx512f")]
60053	unsafe fn test_mm_cvt_roundss_si32() {
60054	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60055	let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
60056	let e: i32 = `-1`;
60057	assert_eq!(r, e);
60058	}
60059
60060	#[simd_test(enable = "avx512f")]
60061	unsafe fn test_mm_cvt_roundss_i32() {
60062	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60063	let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
60064	let e: i32 = `-1`;
60065	assert_eq!(r, e);
60066	}
60067
60068	#[simd_test(enable = "avx512f")]
60069	unsafe fn test_mm_cvt_roundss_u32() {
60070	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60071	let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
60072	let e: u32 = u32::MAX;
60073	assert_eq!(r, e);
60074	}
60075
60076	#[simd_test(enable = "avx512f")]
60077	unsafe fn test_mm_cvtss_i32() {
60078	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60079	let r = _mm_cvtss_i32(a);
60080	let e: i32 = `-2`;
60081	assert_eq!(r, e);
60082	}
60083
60084	#[simd_test(enable = "avx512f")]
60085	unsafe fn test_mm_cvtss_u32() {
60086	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60087	let r = _mm_cvtss_u32(a);
60088	let e: u32 = u32::MAX;
60089	assert_eq!(r, e);
60090	}
60091
60092	#[simd_test(enable = "avx512f")]
60093	unsafe fn test_mm_cvt_roundsd_si32() {
60094	let a = _mm_set_pd(`1.`, `-1.5`);
60095	let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
60096	let e: i32 = `-1`;
60097	assert_eq!(r, e);
60098	}
60099
60100	#[simd_test(enable = "avx512f")]
60101	unsafe fn test_mm_cvt_roundsd_i32() {
60102	let a = _mm_set_pd(`1.`, `-1.5`);
60103	let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
60104	let e: i32 = `-1`;
60105	assert_eq!(r, e);
60106	}
60107
60108	#[simd_test(enable = "avx512f")]
60109	unsafe fn test_mm_cvt_roundsd_u32() {
60110	let a = _mm_set_pd(`1.`, `-1.5`);
60111	let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
60112	let e: u32 = u32::MAX;
60113	assert_eq!(r, e);
60114	}
60115
60116	#[simd_test(enable = "avx512f")]
60117	unsafe fn test_mm_cvtsd_i32() {
60118	let a = _mm_set_pd(`1.`, `-1.5`);
60119	let r = _mm_cvtsd_i32(a);
60120	let e: i32 = `-2`;
60121	assert_eq!(r, e);
60122	}
60123
60124	#[simd_test(enable = "avx512f")]
60125	unsafe fn test_mm_cvtsd_u32() {
60126	let a = _mm_set_pd(`1.`, `-1.5`);
60127	let r = _mm_cvtsd_u32(a);
60128	let e: u32 = u32::MAX;
60129	assert_eq!(r, e);
60130	}
60131
60132	#[simd_test(enable = "avx512f")]
60133	unsafe fn test_mm_cvt_roundi32_ss() {
60134	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60135	let b: i32 = `9`;
60136	let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60137	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
60138	assert_eq_m128(r, e);
60139	}
60140
60141	#[simd_test(enable = "avx512f")]
60142	unsafe fn test_mm_cvt_roundsi32_ss() {
60143	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60144	let b: i32 = `9`;
60145	let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60146	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
60147	assert_eq_m128(r, e);
60148	}
60149
60150	#[simd_test(enable = "avx512f")]
60151	unsafe fn test_mm_cvt_roundu32_ss() {
60152	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60153	let b: u32 = `9`;
60154	let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60155	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
60156	assert_eq_m128(r, e);
60157	}
60158
60159	#[simd_test(enable = "avx512f")]
60160	unsafe fn test_mm_cvti32_ss() {
60161	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60162	let b: i32 = `9`;
60163	let r = _mm_cvti32_ss(a, b);
60164	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
60165	assert_eq_m128(r, e);
60166	}
60167
60168	#[simd_test(enable = "avx512f")]
60169	unsafe fn test_mm_cvti32_sd() {
60170	let a = _mm_set_pd(`1.`, `-1.5`);
60171	let b: i32 = `9`;
60172	let r = _mm_cvti32_sd(a, b);
60173	let e = _mm_set_pd(`1.`, `9.`);
60174	assert_eq_m128d(r, e);
60175	}
60176
60177	#[simd_test(enable = "avx512f")]
60178	unsafe fn test_mm_cvtt_roundss_si32() {
60179	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60180	let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60181	let e: i32 = `-1`;
60182	assert_eq!(r, e);
60183	}
60184
60185	#[simd_test(enable = "avx512f")]
60186	unsafe fn test_mm_cvtt_roundss_i32() {
60187	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60188	let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60189	let e: i32 = `-1`;
60190	assert_eq!(r, e);
60191	}
60192
60193	#[simd_test(enable = "avx512f")]
60194	unsafe fn test_mm_cvtt_roundss_u32() {
60195	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60196	let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60197	let e: u32 = u32::MAX;
60198	assert_eq!(r, e);
60199	}
60200
60201	#[simd_test(enable = "avx512f")]
60202	unsafe fn test_mm_cvttss_i32() {
60203	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60204	let r = _mm_cvttss_i32(a);
60205	let e: i32 = `-1`;
60206	assert_eq!(r, e);
60207	}
60208
60209	#[simd_test(enable = "avx512f")]
60210	unsafe fn test_mm_cvttss_u32() {
60211	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60212	let r = _mm_cvttss_u32(a);
60213	let e: u32 = u32::MAX;
60214	assert_eq!(r, e);
60215	}
60216
60217	#[simd_test(enable = "avx512f")]
60218	unsafe fn test_mm_cvtt_roundsd_si32() {
60219	let a = _mm_set_pd(`1.`, `-1.5`);
60220	let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60221	let e: i32 = `-1`;
60222	assert_eq!(r, e);
60223	}
60224
60225	#[simd_test(enable = "avx512f")]
60226	unsafe fn test_mm_cvtt_roundsd_i32() {
60227	let a = _mm_set_pd(`1.`, `-1.5`);
60228	let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60229	let e: i32 = `-1`;
60230	assert_eq!(r, e);
60231	}
60232
60233	#[simd_test(enable = "avx512f")]
60234	unsafe fn test_mm_cvtt_roundsd_u32() {
60235	let a = _mm_set_pd(`1.`, `-1.5`);
60236	let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60237	let e: u32 = u32::MAX;
60238	assert_eq!(r, e);
60239	}
60240
60241	#[simd_test(enable = "avx512f")]
60242	unsafe fn test_mm_cvttsd_i32() {
60243	let a = _mm_set_pd(`1.`, `-1.5`);
60244	let r = _mm_cvttsd_i32(a);
60245	let e: i32 = `-1`;
60246	assert_eq!(r, e);
60247	}
60248
60249	#[simd_test(enable = "avx512f")]
60250	unsafe fn test_mm_cvttsd_u32() {
60251	let a = _mm_set_pd(`1.`, `-1.5`);
60252	let r = _mm_cvttsd_u32(a);
60253	let e: u32 = u32::MAX;
60254	assert_eq!(r, e);
60255	}
60256
60257	#[simd_test(enable = "avx512f")]
60258	unsafe fn test_mm_cvtu32_ss() {
60259	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
60260	let b: u32 = `9`;
60261	let r = _mm_cvtu32_ss(a, b);
60262	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
60263	assert_eq_m128(r, e);
60264	}
60265
60266	#[simd_test(enable = "avx512f")]
60267	unsafe fn test_mm_cvtu32_sd() {
60268	let a = _mm_set_pd(`1.`, `-1.5`);
60269	let b: u32 = `9`;
60270	let r = _mm_cvtu32_sd(a, b);
60271	let e = _mm_set_pd(`1.`, `9.`);
60272	assert_eq_m128d(r, e);
60273	}
60274
60275	#[simd_test(enable = "avx512f")]
60276	unsafe fn test_mm_comi_round_ss() {
60277	let a = _mm_set1_ps(`2.2`);
60278	let b = _mm_set1_ps(`1.1`);
60279	let r = _mm_comi_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
60280	let e: i32 = `0`;
60281	assert_eq!(r, e);
60282	}
60283
60284	#[simd_test(enable = "avx512f")]
60285	unsafe fn test_mm_comi_round_sd() {
60286	let a = _mm_set1_pd(`2.2`);
60287	let b = _mm_set1_pd(`1.1`);
60288	let r = _mm_comi_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
60289	let e: i32 = `0`;
60290	assert_eq!(r, e);
60291	}
60292
60293	#[simd_test(enable = "avx512f")]
60294	unsafe fn test_mm512_cvtsi512_si32() {
60295	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
60296	let r = _mm512_cvtsi512_si32(a);
60297	let e: i32 = `1`;
60298	assert_eq!(r, e);
60299	}
60300
60301	#[simd_test(enable = "avx512f")]
60302	unsafe fn test_mm512_cvtss_f32() {
60303	let a = _mm512_setr_ps(
60304	`312.0134`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
60305	);
60306	assert_eq!(_mm512_cvtss_f32(a), `312.0134`);
60307	}
60308
60309	#[simd_test(enable = "avx512f")]
60310	unsafe fn test_mm512_cvtsd_f64() {
60311	let r = _mm512_cvtsd_f64(_mm512_setr_pd(`-1.1`, `2.2`, `3.3`, `4.4`, `5.5`, `6.6`, `7.7`, `8.8`));
60312	assert_eq!(r, -`1.1`);
60313	}
60314
60315	#[simd_test(enable = "avx512f")]
60316	unsafe fn test_mm512_shuffle_pd() {
60317	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
60318	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
60319	let r = _mm512_shuffle_pd::<`0b11_11_11_11`>(a, b);
60320	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `4.`, `3.`, `8.`, `7.`);
60321	assert_eq_m512d(r, e);
60322	}
60323
60324	#[simd_test(enable = "avx512f")]
60325	unsafe fn test_mm512_mask_shuffle_pd() {
60326	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
60327	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
60328	let r = _mm512_mask_shuffle_pd::<`0b11_11_11_11`>(a, `0`, a, b);
60329	assert_eq_m512d(r, a);
60330	let r = _mm512_mask_shuffle_pd::<`0b11_11_11_11`>(a, `0b11111111`, a, b);
60331	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `4.`, `3.`, `8.`, `7.`);
60332	assert_eq_m512d(r, e);
60333	}
60334
60335	#[simd_test(enable = "avx512f")]
60336	unsafe fn test_mm512_maskz_shuffle_pd() {
60337	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
60338	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
60339	let r = _mm512_maskz_shuffle_pd::<`0b11_11_11_11`>(`0`, a, b);
60340	assert_eq_m512d(r, _mm512_setzero_pd());
60341	let r = _mm512_maskz_shuffle_pd::<`0b11_11_11_11`>(`0b00001111`, a, b);
60342	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `0.`, `0.`, `0.`, `0.`);
60343	assert_eq_m512d(r, e);
60344	}
60345
60346	#[simd_test(enable = "avx512f")]
60347	unsafe fn test_mm512_mask_expandloadu_epi32() {
60348	let src = _mm512_set1_epi32(`42`);
60349	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
60350	let p = a.as_ptr();
60351	let m = `0b11101000_11001010`;
60352	let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60353	let e = _mm512_set_epi32(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
60354	assert_eq_m512i(r, e);
60355	}
60356
60357	#[simd_test(enable = "avx512f")]
60358	unsafe fn test_mm512_maskz_expandloadu_epi32() {
60359	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
60360	let p = a.as_ptr();
60361	let m = `0b11101000_11001010`;
60362	let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60363	let e = _mm512_set_epi32(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
60364	assert_eq_m512i(r, e);
60365	}
60366
60367	#[simd_test(enable = "avx512f,avx512vl")]
60368	unsafe fn test_mm256_mask_expandloadu_epi32() {
60369	let src = _mm256_set1_epi32(`42`);
60370	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
60371	let p = a.as_ptr();
60372	let m = `0b11101000`;
60373	let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60374	let e = _mm256_set_epi32(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
60375	assert_eq_m256i(r, e);
60376	}
60377
60378	#[simd_test(enable = "avx512f,avx512vl")]
60379	unsafe fn test_mm256_maskz_expandloadu_epi32() {
60380	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
60381	let p = a.as_ptr();
60382	let m = `0b11101000`;
60383	let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60384	let e = _mm256_set_epi32(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
60385	assert_eq_m256i(r, e);
60386	}
60387
60388	#[simd_test(enable = "avx512f,avx512vl")]
60389	unsafe fn test_mm_mask_expandloadu_epi32() {
60390	let src = _mm_set1_epi32(`42`);
60391	let a = &[`1_i32`, `2`, `3`, `4`];
60392	let p = a.as_ptr();
60393	let m = `0b11111000`;
60394	let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60395	let e = _mm_set_epi32(`1`, `42`, `42`, `42`);
60396	assert_eq_m128i(r, e);
60397	}
60398
60399	#[simd_test(enable = "avx512f,avx512vl")]
60400	unsafe fn test_mm_maskz_expandloadu_epi32() {
60401	let a = &[`1_i32`, `2`, `3`, `4`];
60402	let p = a.as_ptr();
60403	let m = `0b11111000`;
60404	let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60405	let e = _mm_set_epi32(`1`, `0`, `0`, `0`);
60406	assert_eq_m128i(r, e);
60407	}
60408
60409	#[simd_test(enable = "avx512f")]
60410	unsafe fn test_mm512_mask_expandloadu_epi64() {
60411	let src = _mm512_set1_epi64(`42`);
60412	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
60413	let p = a.as_ptr();
60414	let m = `0b11101000`;
60415	let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60416	let e = _mm512_set_epi64(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
60417	assert_eq_m512i(r, e);
60418	}
60419
60420	#[simd_test(enable = "avx512f")]
60421	unsafe fn test_mm512_maskz_expandloadu_epi64() {
60422	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
60423	let p = a.as_ptr();
60424	let m = `0b11101000`;
60425	let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60426	let e = _mm512_set_epi64(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
60427	assert_eq_m512i(r, e);
60428	}
60429
60430	#[simd_test(enable = "avx512f,avx512vl")]
60431	unsafe fn test_mm256_mask_expandloadu_epi64() {
60432	let src = _mm256_set1_epi64x(`42`);
60433	let a = &[`1_i64`, `2`, `3`, `4`];
60434	let p = a.as_ptr();
60435	let m = `0b11101000`;
60436	let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60437	let e = _mm256_set_epi64x(`1`, `42`, `42`, `42`);
60438	assert_eq_m256i(r, e);
60439	}
60440
60441	#[simd_test(enable = "avx512f,avx512vl")]
60442	unsafe fn test_mm256_maskz_expandloadu_epi64() {
60443	let a = &[`1_i64`, `2`, `3`, `4`];
60444	let p = a.as_ptr();
60445	let m = `0b11101000`;
60446	let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60447	let e = _mm256_set_epi64x(`1`, `0`, `0`, `0`);
60448	assert_eq_m256i(r, e);
60449	}
60450
60451	#[simd_test(enable = "avx512f,avx512vl")]
60452	unsafe fn test_mm_mask_expandloadu_epi64() {
60453	let src = _mm_set1_epi64x(`42`);
60454	let a = &[`1_i64`, `2`];
60455	let p = a.as_ptr();
60456	let m = `0b11101000`;
60457	let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60458	let e = _mm_set_epi64x(`42`, `42`);
60459	assert_eq_m128i(r, e);
60460	}
60461
60462	#[simd_test(enable = "avx512f,avx512vl")]
60463	unsafe fn test_mm_maskz_expandloadu_epi64() {
60464	let a = &[`1_i64`, `2`];
60465	let p = a.as_ptr();
60466	let m = `0b11101000`;
60467	let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60468	let e = _mm_set_epi64x(`0`, `0`);
60469	assert_eq_m128i(r, e);
60470	}
60471
60472	#[simd_test(enable = "avx512f")]
60473	unsafe fn test_mm512_mask_expandloadu_ps() {
60474	let src = _mm512_set1_ps(`42.`);
60475	let a = &[
60476	`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
60477	];
60478	let p = a.as_ptr();
60479	let m = `0b11101000_11001010`;
60480	let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60481	let e = _mm512_set_ps(
60482	`8.`, `7.`, `6.`, `42.`, `5.`, `42.`, `42.`, `42.`, `4.`, `3.`, `42.`, `42.`, `2.`, `42.`, `1.`, `42.`,
60483	);
60484	assert_eq_m512(r, e);
60485	}
60486
60487	#[simd_test(enable = "avx512f")]
60488	unsafe fn test_mm512_maskz_expandloadu_ps() {
60489	let a = &[
60490	`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
60491	];
60492	let p = a.as_ptr();
60493	let m = `0b11101000_11001010`;
60494	let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60495	let e = _mm512_set_ps(
60496	`8.`, `7.`, `6.`, `0.`, `5.`, `0.`, `0.`, `0.`, `4.`, `3.`, `0.`, `0.`, `2.`, `0.`, `1.`, `0.`,
60497	);
60498	assert_eq_m512(r, e);
60499	}
60500
60501	#[simd_test(enable = "avx512f,avx512vl")]
60502	unsafe fn test_mm256_mask_expandloadu_ps() {
60503	let src = _mm256_set1_ps(`42.`);
60504	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
60505	let p = a.as_ptr();
60506	let m = `0b11101000`;
60507	let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60508	let e = _mm256_set_ps(`4.`, `3.`, `2.`, `42.`, `1.`, `42.`, `42.`, `42.`);
60509	assert_eq_m256(r, e);
60510	}
60511
60512	#[simd_test(enable = "avx512f,avx512vl")]
60513	unsafe fn test_mm256_maskz_expandloadu_ps() {
60514	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
60515	let p = a.as_ptr();
60516	let m = `0b11101000`;
60517	let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60518	let e = _mm256_set_ps(`4.`, `3.`, `2.`, `0.`, `1.`, `0.`, `0.`, `0.`);
60519	assert_eq_m256(r, e);
60520	}
60521
60522	#[simd_test(enable = "avx512f,avx512vl")]
60523	unsafe fn test_mm_mask_expandloadu_ps() {
60524	let src = _mm_set1_ps(`42.`);
60525	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
60526	let p = a.as_ptr();
60527	let m = `0b11101000`;
60528	let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60529	let e = _mm_set_ps(`1.`, `42.`, `42.`, `42.`);
60530	assert_eq_m128(r, e);
60531	}
60532
60533	#[simd_test(enable = "avx512f,avx512vl")]
60534	unsafe fn test_mm_maskz_expandloadu_ps() {
60535	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
60536	let p = a.as_ptr();
60537	let m = `0b11101000`;
60538	let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60539	let e = _mm_set_ps(`1.`, `0.`, `0.`, `0.`);
60540	assert_eq_m128(r, e);
60541	}
60542
60543	#[simd_test(enable = "avx512f")]
60544	unsafe fn test_mm512_mask_expandloadu_pd() {
60545	let src = _mm512_set1_pd(`42.`);
60546	let a = &[`1.0f64`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
60547	let p = a.as_ptr();
60548	let m = `0b11101000`;
60549	let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60550	let e = _mm512_set_pd(`4.`, `3.`, `2.`, `42.`, `1.`, `42.`, `42.`, `42.`);
60551	assert_eq_m512d(r, e);
60552	}
60553
60554	#[simd_test(enable = "avx512f")]
60555	unsafe fn test_mm512_maskz_expandloadu_pd() {
60556	let a = &[`1.0f64`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
60557	let p = a.as_ptr();
60558	let m = `0b11101000`;
60559	let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60560	let e = _mm512_set_pd(`4.`, `3.`, `2.`, `0.`, `1.`, `0.`, `0.`, `0.`);
60561	assert_eq_m512d(r, e);
60562	}
60563
60564	#[simd_test(enable = "avx512f,avx512vl")]
60565	unsafe fn test_mm256_mask_expandloadu_pd() {
60566	let src = _mm256_set1_pd(`42.`);
60567	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
60568	let p = a.as_ptr();
60569	let m = `0b11101000`;
60570	let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60571	let e = _mm256_set_pd(`1.`, `42.`, `42.`, `42.`);
60572	assert_eq_m256d(r, e);
60573	}
60574
60575	#[simd_test(enable = "avx512f,avx512vl")]
60576	unsafe fn test_mm256_maskz_expandloadu_pd() {
60577	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
60578	let p = a.as_ptr();
60579	let m = `0b11101000`;
60580	let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60581	let e = _mm256_set_pd(`1.`, `0.`, `0.`, `0.`);
60582	assert_eq_m256d(r, e);
60583	}
60584
60585	#[simd_test(enable = "avx512f,avx512vl")]
60586	unsafe fn test_mm_mask_expandloadu_pd() {
60587	let src = _mm_set1_pd(`42.`);
60588	let a = &[`1.0f64`, `2.`];
60589	let p = a.as_ptr();
60590	let m = `0b11101000`;
60591	let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60592	let e = _mm_set_pd(`42.`, `42.`);
60593	assert_eq_m128d(r, e);
60594	}
60595
60596	#[simd_test(enable = "avx512f,avx512vl")]
60597	unsafe fn test_mm_maskz_expandloadu_pd() {
60598	let a = &[`1.0f64`, `2.`];
60599	let p = a.as_ptr();
60600	let m = `0b11101000`;
60601	let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60602	let e = _mm_set_pd(`0.`, `0.`);
60603	assert_eq_m128d(r, e);
60604	}
60605	}
60606